In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import warnings

plt.style.use("fivethirtyeight")
warnings.filterwarnings('ignore')

In [14]:
!pip install openpyxl

**EDA**

In [15]:
data = pd.read_excel("../input/pumpkin-seeds-dataset/Pumpkin_Seeds_Dataset.xlsx")

In [16]:
data.head()

In [17]:
data.tail()

In [18]:
data.info()

In [19]:
data.describe()

In [20]:
data['Class'].value_counts().plot(kind="pie")

In [21]:
data['Class'].value_counts().plot(kind="bar")

In [22]:
data.isnull().sum()

In [23]:
corr = data.corr()

In [24]:
plt.figure(figsize=(17, 17))
sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True)

In [25]:
sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)

In [26]:
data.head()

Spliting and Preprocessing

In [27]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.utils import shuffle

In [28]:
x = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [29]:
x.head()

In [30]:
y

In [31]:
x, y = shuffle(x, y)
x, y = x.values, y.values

In [32]:
scaler = MinMaxScaler()
# transform data
x_scaled = scaler.fit_transform(x)

In [22]:
x.shape

In [34]:
from collections import Counter
counter_before = Counter(y)
print(counter_before)

In [36]:
#applying SMOTE for imbalance
from imblearn.over_sampling import SMOTE
oversample = SMOTE()
x_data, y_data = oversample.fit_resample(x_scaled, y)

In [37]:
from collections import Counter
counter_after = Counter(y_data)
print(counter_after)

In [38]:
X_data, Y_data = [], []
for x, y in zip(x_data, y_data):
    if y == "Çerçevelik":
        Y_data.append([1, 0])
        X_data.append(x)
    if y == "Ürgüp Sivrisi":
        Y_data.append([0, 1])
        X_data.append(x)

In [39]:
X_data = np.array(X_data)
Y_data = np.array(Y_data)

In [40]:
X_data.shape

In [42]:
Y_data.shape

In [43]:
x_train, x_test, y_train, y_test = train_test_split(X_data, Y_data, random_state=42, test_size=0.2)

In [44]:
X_train = np.array(x_train).reshape(x_train.shape[0], x_train.shape[1], 1)
X_test = np.array(x_test).reshape(x_test.shape[0], x_test.shape[1], 1)

In [45]:
print("X Train shape: ", X_train.shape)
print("X Test shape: ", X_test.shape)

**Model**

In [47]:
import tensorflow as tf

In [48]:
# Conv Layers

inp = tf.keras.layers.Input(shape=(X_train.shape[1],1))
u1 = tf.keras.layers.Conv1D(filters=32, kernel_size=(3), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001))(inp)
u2 = tf.keras.layers.MaxPooling1D(pool_size=(2))(u1)
u3 = tf.keras.layers.Dropout(rate=0.2)(u2)
u4 = tf.keras.layers.Conv1D(filters=64, kernel_size=(3), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001))(u3)
u5 = tf.keras.layers.MaxPooling1D(pool_size=(2))(u4)
u6 = tf.keras.layers.Dropout(rate=0.2)(u5)
u7 = tf.keras.layers.Conv1D(filters=64, kernel_size=(3), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001))(u6)
u8 = tf.keras.layers.MaxPooling1D(pool_size=(2))(u7)
u9 = tf.keras.layers.Dropout(rate=0.2)(u8)

f = tf.keras.layers.Flatten()(u9)
l1 = tf.keras.layers.Dense(units=512)(f)
l2 = tf.keras.layers.LeakyReLU(alpha=0.001)(l1)
l3 = tf.keras.layers.BatchNormalization()(l2)
l4 = tf.keras.layers.Dense(units=256)(l3)
l5 = tf.keras.layers.LeakyReLU(alpha=0.001)(l4)
l6 = tf.keras.layers.BatchNormalization()(l5)
l7 = tf.keras.layers.Dense(units=128)(l6)
l8 = tf.keras.layers.LeakyReLU(alpha=0.001)(l7)
l9 = tf.keras.layers.BatchNormalization()(l8)
l10 = tf.keras.layers.Dense(units=2)(l9)
outputs = tf.keras.layers.Activation("softmax")(l10)

model = tf.keras.models.Model(inputs = [inp], outputs = [outputs])

In [49]:
model.summary()

In [50]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [51]:
BATCH = 16
EPOCHS = 100

In [52]:
history = model.fit(
  x_train, y_train,
  validation_data=(x_test ,y_test),
  epochs=EPOCHS,
  batch_size=BATCH
  # callbacks=callbacks
)

In [53]:
loss, accuracy = model.evaluate(X_test, y_test)

In [54]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(["accuracy","val_accuracy"])
plt.title('Accuracy Vs Val_Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

In [55]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(["loss","val_loss"])
plt.title('Loss Vs Val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

In [56]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [58]:
predictions = model.predict(X_test)

In [59]:
pred = np.argmax(predictions, axis=1)
# label
y_test = np.argmax(y_test, axis=1)

In [61]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))

In [62]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, pred)

sns.heatmap(cm,  annot=True, fmt="d" ,cmap="YlGnBu")

In [63]:
from sklearn.metrics import classification_report
class_report=classification_report(y_test, pred)

In [64]:
print(class_report)

In [87]:
#  precision    recall  f1-score   support

#            0       0.80      0.96      0.87       268
#            1       0.95      0.74      0.83       252

#     accuracy                           0.86       520
#    macro avg       0.87      0.85      0.85       520
# weighted avg       0.87      0.86      0.85       520

**SVM**

In [65]:
from sklearn.svm import SVC

In [66]:
classifier = SVC(kernel='linear', random_state=0)

In [70]:
x = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [71]:
x, y = shuffle(x, y)
x, y = x.values, y.values

In [72]:
scaler = MinMaxScaler()
# transform data
x_scaled = scaler.fit_transform(x)

In [73]:
#applying SMOTE for imbalance
from imblearn.over_sampling import SMOTE
oversample = SMOTE()
x_data, y_data = oversample.fit_resample(x_scaled, y)

In [74]:
X_data, Y_data = [], []
for x, y in zip(x_data, y_data):
    if y == "Çerçevelik":
        Y_data.append(1)
        X_data.append(x)
    if y == "Ürgüp Sivrisi":
        Y_data.append(0)
        X_data.append(x)

In [75]:
X_data = np.array(X_data)
Y_data = np.array(Y_data)

In [76]:
x_train, x_test, y_train, y_test = train_test_split(X_data, Y_data, random_state=42, test_size=0.2)

In [77]:
classifier.fit(x_train, y_train)

In [78]:
y_pred = classifier.predict(x_test)

In [79]:
cm = confusion_matrix(y_test, y_pred)

In [86]:
# array([[219,  35],
#        [ 34, 232]])

In [81]:
sns.heatmap(cm,  annot=True, fmt="d" ,cmap="YlGnBu")

In [82]:
from sklearn.metrics import classification_report
class_report=classification_report(y_test, y_pred)

In [84]:
print(class_report)

In [85]:
#  precision    recall  f1-score   support

#            0       0.87      0.86      0.86       254
#            1       0.87      0.87      0.87       266

#     accuracy                           0.87       520
#    macro avg       0.87      0.87      0.87       520
# weighted avg       0.87      0.87      0.87       520