In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

### Dataset Preparation
This dataset shows about the readings from a telescope 
In this example we are going to predict whether the rays are going to be gamma or hydron using the Classification principle of Machine Learning

In [2]:
# Deletion of unnamed column
df = pd.read_csv('Datasets/telescope_data.csv', index_col=0)
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


Differentiate the classcolumn with num series for "g" and "h"

In [3]:
df["class"] = (df['class'] == "g").astype(int)

In [4]:
df["class"].unique()

array([1, 0])

- Features are the elements passed in our model in order for us to predict the label
- Label which is the class column.
This is actually Supervised Learning

In [5]:
cols = df[df.columns]

In [None]:
for label in cols[:-1]:
    plt.hist(df[df["class"]==1][label], color="red", label ="gamma", alpha= 0.7, density=True )
    plt.hist(df[df["class"]==0][label], color="blue", label ="hadron", alpha= 0.7, density=True )
    plt.title(label)
    plt.ylabel("Probability")
    plt.xlabel(label)
    plt.legend()
    plt.show()

### Training , Validaating and Testing Dataset

In [7]:
train , valid, test =np.split(df.sample(frac=1), (int(0.6*len(df)), int(0.8*len(df))))

  return bound(*args, **kwds)


Scaling the dataset values 


In [8]:
def scaled(df, oversample= False): #scaler used to ensure the same sample size for both of the features data
    x =df[df.columns[:-1]].values
    y =df[df.columns[-1]].values

    scaler = StandardScaler()
    x= scaler.fit_transform(x) # Take x and fit the standard scaler to x and scale the values 
    if oversample:
        ros = RandomOverSampler()
        x , y =ros.fit_resample(x,y) # Take less class and sample fromther eto increase the values of sample

    data =np.hstack((x, np.reshape(y, (-1,1)))) #reshaping the data to a similar array stack

    return data, x, y

In [9]:
train, x_train, y_train = scaled(train, oversample =True)
valid, x_valid, y_valid = scaled(valid, oversample =False)
test, x_test, y_test = scaled(test, oversample =False)

#### kNN- K Nearest Neighbours
- Importation of Kneighbours and classification reports package from sklearn 


In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [11]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(x_train, y_train)

In [12]:
y_prediction = knn_model.predict(x_test)

In [13]:
print(classification_report(y_test, y_prediction))

              precision    recall  f1-score   support

           0       0.75      0.74      0.75      1324
           1       0.86      0.87      0.87      2480

    accuracy                           0.83      3804
   macro avg       0.81      0.81      0.81      3804
weighted avg       0.82      0.83      0.83      3804



#### Naive Bayes Rule
Classification techniques with the assumption that the prescence of a particular feature in a class is un-related to the prescence of any other feature.
- Applies the Bayes probability technique

In [14]:
# Imprtation of the Bayes package
from sklearn.naive_bayes import GaussianNB

In [16]:
nb_model = GaussianNB()
nb_model = nb_model.fit(x_train, y_train)

In [17]:
y_pred = nb_model.predict(x_test)
print(classification_report(y_test,y_pred ))

              precision    recall  f1-score   support

           0       0.68      0.39      0.49      1324
           1       0.73      0.90      0.81      2480

    accuracy                           0.72      3804
   macro avg       0.71      0.64      0.65      3804
weighted avg       0.71      0.72      0.70      3804



#### Logistic Regression

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
lg_model = LogisticRegression()
lg_model = lg_model.fit(x_train,y_train)

In [20]:
y_pred = lg_model.predict(x_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.68      0.72      0.70      1324
           1       0.85      0.82      0.83      2480

    accuracy                           0.78      3804
   macro avg       0.76      0.77      0.76      3804
weighted avg       0.79      0.78      0.78      3804



### Neural Networks


In [2]:
import tensorflow as tf




In [6]:
nn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation="relu", input_shape=(10,)),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

nn_model.compile(optimizer=tf.keras.optimizers.Adam(0.001))