In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [None]:
cols=["fLength","fWidth","fSize","fConc","fConc1","fAsym","fM3Long","fM3Trans","fAlpha","fDist","class"]
df= pd.read_csv("/content/magic04(1).data", names= cols)
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [None]:
df["class"].unique()

array(['g', 'h'], dtype=object)

In [None]:
df["class"] = (df["class"] =="g").astype(int)
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,1
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,1
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,1
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,1
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,1


In [None]:
for label in df.columns[:-1]:
  plt.hist(df[df["class"]==1][label], label="Gamma", color = "red", alpha = 0.7, density = True)
  plt.hist(df[df["class"]==0][label], label="Hadron", color = "blue", alpha = 0.7, density = True)
  plt.xlabel(label)
  plt.ylabel("probability")
  plt.title(label)
  plt.show()

#Train, validate, test

In [None]:
train, valid, test = np.split(df.sample(frac=1), [int (0.6*len(df)), int (0.8*len(df))])

In [None]:
print(len(train[train["class"]==1]))
print(len(train[train["class"]==0]))

7442
3970


In [None]:
def scale_dataset(dataframe, oversample=False):
  X = dataframe[dataframe.columns[:-1]].values
  y = dataframe[dataframe.columns[-1]].values

  scaler = StandardScaler()
  X = scaler.fit_transform(X)

  if oversample:
    ros = RandomOverSampler()
    X, y = ros.fit_resample(X, y)

  data = np.hstack((X, np.reshape(y, (-1, 1))))

  return data, X, y

In [None]:
train, X_train, y_train = scale_dataset(train, oversample=True)
valid, X_valid, y_valid = scale_dataset(valid, oversample=False)
test, X_test, y_test = scale_dataset(test, oversample=False)

In [None]:
len(y_train)
sum(y_train == 1)
sum(y_train == 0)

7442

#K_Nearest_Neighbors

In [None]:
knn_model = KNeighborsClassifier(n_neighbors = 5)
knn_model.fit(X_train, y_train)

KNeighborsClassifier()

In [None]:
y_pred = knn_model.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      0.74      0.74      1361
           1       0.85      0.86      0.86      2443

    accuracy                           0.81      3804
   macro avg       0.80      0.80      0.80      3804
weighted avg       0.81      0.81      0.81      3804



#Naive_Bayes


In [None]:
NB_model = GaussianNB()
NB_model.fit(X_train, y_train)
y_pred = NB_model.predict(X_test)
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.41      0.69      0.52       803
           1       0.90      0.73      0.81      3001

    accuracy                           0.72      3804
   macro avg       0.65      0.71      0.66      3804
weighted avg       0.80      0.72      0.75      3804



#Logistic Regression

In [None]:
lg_model = LogisticRegression()
lg_model.fit(X_train, y_train)
y_pred = lg_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.69      0.72      0.71      1361
           1       0.84      0.82      0.83      2443

    accuracy                           0.79      3804
   macro avg       0.77      0.77      0.77      3804
weighted avg       0.79      0.79      0.79      3804



#Support_Vector_Machines

In [None]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.79      0.80      1361
           1       0.88      0.90      0.89      2443

    accuracy                           0.86      3804
   macro avg       0.85      0.84      0.85      3804
weighted avg       0.86      0.86      0.86      3804



#Neural Networks for classification

In [None]:
import tensorflow as tf

In [None]:
def plot_history(history):
  fix, (ax1, ax2) = plt.subplots(1, 2, figsize= (12, 4))
  ax1.plot(history.history['loss'],label = 'loss')
  ax1.plot(history.history['val_loss'],label = 'val_loss')
  ax1.set_xlabel('Epoch')
  ax1.set_ylabel("binary crosentropy")
  ax1.legend()
  ax2.plot(history.history['accuracy'],label = 'loss')
  ax2.plot(history.history['val_accuracy'],label = 'val_accuracy')
  ax2.set_xlabel('Epoch')
  ax2.set_ylabel("accuracy")
  ax2.legend()
  ax1.grid(True)
  plt.show()

In [None]:
def train_model(X_train, y_train, node_nbr, batch_size, dropout_prob, alpha, epochs):
  nn_model = tf.keras.Sequential([
      tf.keras.layers.Dense(node_nbr, activation="relu", input_shape=(10,)),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(node_nbr, activation="relu"),
      tf.keras.layers.Dropout(dropout_prob),
      tf.keras.layers.Dense(1, activation ="sigmoid"),
  ])
  nn_model.compile(optimizer = tf.keras.optimizers.Adam(alpha), loss= "binary_crossentropy", metrics = ["accuracy"])

  history = nn_model.fit(
    X_train, y_train, epochs = epochs, batch_size = batch_size, validation_split = 0.2, verbose =0
  )
  return nn_model, history

In [None]:
epochs = 100
minimal_loss_value = float('inf')
efficient_model = None
for node_nbr in [16, 32, 64]:
  for batch_size in [16, 32, 64]:
    for alpha in [0.001, 0.005, 0.01 ]:
      for dropout_prob in [0, 0.1, 0.2]:
        print(f" node_nbr= {node_nbr} batch size= {batch_size} learning rate = {alpha} dropout prob = {dropout_prob} ")
        model, history = train_model(X_train, y_train, node_nbr, batch_size, dropout_prob, alpha, epochs)
        #plot_history(history)
        loss_value = model.evaluate(X_valid, y_valid)[0]
        if loss_value < minimal_loss_value:
          minimal_loss_value = loss_value
          efficient_model = model

In [None]:
y_pred = efficient_model.predict(X_test)
print(classification_report(y_test, y_pred))