# ** MAGIC Telescope (K-Neighbor Classifier) Report **

##### This dataset has been cleaned to consist of telescope data including length, width, size, distance and particle classification.  Using this data and predictive modeling methods(K-Neighbor Classification), machine learning will be utilized for predictive classification of the radtioation particle (Gamma or Hadron).

#### Link to Dataset: https://archive.ics.uci.edu/ml/datasets/MAGIC+Gamma+Telescope

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import brier_score_loss
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn import neighbors


import numpy as np
import pandas as pd

dataSet=pd.read_csv('GammaTelescopeDataCleaned.csv')
X = dataSet.iloc[:,:-1]
y = dataSet.iloc[:,4]
dataSet.head()



Unnamed: 0,length,width,size,distance,class
0,28.7967,16.0021,2.6449,81.8828,g
1,31.6036,11.7235,2.5185,205.261,g
2,162.052,136.031,4.0612,256.788,g
3,23.8172,9.5728,2.3385,116.737,g
4,75.1362,30.9205,3.1611,356.462,g


### Using 80% of the dataset for training and 20% for testing the model

In [2]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42)

### Setting up the First K Neighbor Classifier

In [3]:
classifier = KNeighborsClassifier(n_neighbors=2)
classifier.fit(X_train, y_train)

y_predicted=classifier.predict(X_test)
y_predicted_proba=classifier.predict_proba(X_test)

y_train_predicted=classifier.predict(X_train)
y_train_predicted_proba=classifier.predict_proba(X_train)

### Evaluation Metrics : Test Data

In [7]:
print("Classification Report")
print(metrics.classification_report(y_test,y_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_test,y_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.73      0.92      0.82      2460
           h       0.73      0.38      0.50      1344

    accuracy                           0.73      3804
   macro avg       0.73      0.65      0.66      3804
weighted avg       0.73      0.73      0.71      3804

Confusion Matrix
[[2275  185]
 [ 833  511]]


In [6]:
print("Accuracy = ",accuracy_score(y_test,y_predicted)*100)
print("Sensitivity = ", 511/(511+833))
print("Specificity = ", 2275/(2275+185))
print("f1-score = ",metrics.f1_score(y_test,y_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_test,y_predicted_proba))

Accuracy =  73.23869610935857
Sensitivity =  0.3802083333333333
Specificity =  0.9247967479674797
f1-score =  0.7323869610935857
log loss =  5.752473288873


### Evaluation Metrics : Training Data

In [8]:
print("Classification Report")
print(metrics.classification_report(y_train,y_train_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_train,y_train_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.80      1.00      0.89      9872
           h       1.00      0.54      0.70      5344

    accuracy                           0.84     15216
   macro avg       0.90      0.77      0.80     15216
weighted avg       0.87      0.84      0.82     15216

Confusion Matrix
[[9872    0]
 [2455 2889]]


In [9]:
print("Accuracy = ",accuracy_score(y_train,y_train_predicted)*100)
print("Sensitivity = ", 2889/(2889+2455))
print("Specificity = ", 9872/(9872+0))
print("f1-score = ",metrics.f1_score(y_train,y_train_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_train,y_train_predicted_proba))

Accuracy =  83.86566771819138
Sensitivity =  0.5406062874251497
Specificity =  1.0
f1-score =  0.8386566771819137
log loss =  0.2161529555571077


### Setting up the Second K Neighbor Classifier

In [26]:
classifier_2 = KNeighborsClassifier(n_neighbors=3)
classifier_2.fit(X_train, y_train)

y2_predicted=classifier_2.predict(X_test)
y2_predicted_proba=classifier_2.predict_proba(X_test)

y2_train_predicted=classifier_2.predict(X_train)
y2_train_predicted_proba=classifier_2.predict_proba(X_train)

### Evaluation Metrics: Test Data

In [11]:
print("Classification Report")
print(metrics.classification_report(y_test, y2_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_test,y2_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.76      0.84      0.80      2460
           h       0.64      0.52      0.57      1344

    accuracy                           0.73      3804
   macro avg       0.70      0.68      0.69      3804
weighted avg       0.72      0.73      0.72      3804

Confusion Matrix
[[2068  392]
 [ 646  698]]


In [18]:
print("Accuracy = ",accuracy_score(y_test,y2_predicted)*100)
print("Sensitivity = ", 698/(698+646))
print("Specificity = ", 2068/(2068+392))
print("f1-score = ",metrics.f1_score(y_test,y2_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_test,y2_predicted_proba))

Accuracy =  72.71293375394322
Sensitivity =  0.5193452380952381
Specificity =  0.8406504065040651
f1-score =  0.7271293375394322
log loss =  3.692956713437581


### Evaluation Metrics: Training Data

In [17]:
print("Classification Report")
print(metrics.classification_report(y_train,y2_train_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_train,y2_train_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.85      0.92      0.88      9872
           h       0.83      0.69      0.76      5344

    accuracy                           0.84     15216
   macro avg       0.84      0.81      0.82     15216
weighted avg       0.84      0.84      0.84     15216

Confusion Matrix
[[9127  745]
 [1637 3707]]


In [16]:
print("Accuracy = ",accuracy_score(y_train,y2_train_predicted)*100)
print("Sensitivity = ", 3707/(3707+1637))
print("Specificity = ", 9127/(9127+745))
print("f1-score = ",metrics.f1_score(y_train,y2_train_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_train,y2_train_predicted_proba))

Accuracy =  84.34542586750788
Sensitivity =  0.6936751497005988
Specificity =  0.9245340356564019
f1-score =  0.8434542586750788
log loss =  0.3029811345338579


### Setting up the Third K Neighbor Classifier

In [27]:
classifier_3 = KNeighborsClassifier(n_neighbors=4)
classifier_3.fit(X_train, y_train)

y3_predicted=classifier_3.predict(X_test)
y3_predicted_proba=classifier_3.predict_proba(X_test)

y3_train_predicted=classifier_3.predict(X_train)
y3_train_predicted_proba=classifier_3.predict_proba(X_train)

### Evaluation Metrics: Test Data

In [28]:
print("Classification Report")
print(metrics.classification_report(y_test, y3_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_test,y3_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.74      0.92      0.82      2460
           h       0.75      0.41      0.53      1344

    accuracy                           0.74      3804
   macro avg       0.74      0.67      0.67      3804
weighted avg       0.74      0.74      0.72      3804

Confusion Matrix
[[2275  185]
 [ 798  546]]


In [33]:
print("Accuracy = ",accuracy_score(y_test,y3_predicted)*100)
print("Sensitivity = ", 546/(546+798))
print("Specificity = ", 2275/(2275+185))
print("f1-score = ",metrics.f1_score(y_test,y3_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_test,y3_predicted_proba))

Accuracy =  74.15878023133544
Sensitivity =  0.40625
Specificity =  0.9247967479674797
f1-score =  0.7415878023133544
log loss =  2.7731525241128194


### Evaluation Metrics: Training Data

In [34]:
print("Classification Report")
print(metrics.classification_report(y_train,y3_train_predicted))

print("Confusion Matrix")
print(metrics.confusion_matrix(y_train,y3_train_predicted))

Classification Report
              precision    recall  f1-score   support

           g       0.79      0.97      0.87      9872
           h       0.90      0.51      0.65      5344

    accuracy                           0.81     15216
   macro avg       0.84      0.74      0.76     15216
weighted avg       0.82      0.81      0.79     15216

Confusion Matrix
[[9553  319]
 [2615 2729]]


In [35]:
print("Accuracy = ",accuracy_score(y_train,y3_train_predicted)*100)
print("Sensitivity = ", 2729/(2729+2615))
print("Specificity = ", 9553/(9553+319))
print("f1-score = ",metrics.f1_score(y_train,y3_train_predicted, average="micro"))
print("log loss = ", metrics.log_loss(y_train,y3_train_predicted_proba))

Accuracy =  80.71766561514195
Sensitivity =  0.5106661676646707
Specificity =  0.9676863857374393
f1-score =  0.8071766561514195
log loss =  0.3480804091228501
