<a href="https://colab.research.google.com/github/A-Burnhard/Major-Atmospheric-Gamma-Imaging-Cherenkov-Telescope-project-MAGIC-/blob/main/MAGIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler


**Dataset** 
 1.Title of Database: MAGIC gamma telescope data 2004

2. Sources:

   (a) Original owner of the database:

       R. K. Bock
       Major Atmospheric Gamma Imaging Cherenkov Telescope project (MAGIC)
       http://wwwmagic.mppmu.mpg.de
       rkb@mail.cern.ch


In [7]:
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive//Machine Learning/magic04.data'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class" ]
df = pd.read_csv(file_path, names=cols)
df.head()


In [None]:
df["class"] = (df["class"] == "g").astype(int)

df.head()

In [None]:
for label in cols[:-1]:
  data_gamma= df[df["class"]==1][label],
  data_hadron = df[df["class"]==0][label], 
 
  plt.hist(data_gamma, color='blue',label='gamma', alpha=0.7, density=True, histtype = 'bar')
  plt.hist(data_hadron, color='red', label='hadron',alpha=0.7, density=True, histtype = 'bar')
  plt.title(label)
  plt.ylabel("Probability")
  plt.xlabel(label)
  plt.legend()
  plt.show()

**Train, Validation, test datasets**

In [9]:
train, valid, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])


Between 60% and 80% wil go towards validation and from 80% to 100% will be the test data.

**Scaling dataset** using Standard Deviation
Hstack = convert to 2D Numpy

In [10]:
def scale_dataset(dataframe, oversample):
  x = dataframe[dataframe.columns[:-1]].values
  y = dataframe[dataframe.columns[-1]].values

  scaler= StandardScaler()
  x = scaler.fit_transform(x)

  if oversample:
    ros = RandomOverSampler()
    x,y = ros.fit_resample(x,y)

  data = np.hstack((x,np.reshape(y,(-1,1))))

  return data, x, y

In [11]:
train, x_train, y_train = scale_dataset(train, oversample=True)
valid, x_valid, y_valid = scale_dataset(valid, oversample=False)
test, x_test, y_test = scale_dataset(test, oversample=False)




**KNN**

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [13]:
knn_model = KNeighborsClassifier(n_neighbors=1)
knn_model.fit(x_train, y_train)

In [14]:
y_pred = knn_model.predict(x_test)

In [None]:
print(classification_report(y_test, y_pred))

Naive Bayes 

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
nb_model = GaussianNB()
nb_model = nb_model.fit(x_train, y_train)

In [None]:
y_pred = nb_model.predict(x_test)
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lg_model = LogisticRegression()
lg_model = lg_model.fit(x_train, y_train)

In [None]:
y_pred = lg_model.predict(x_test)
print(classification_report(y_test, y_pred))

**Support Vector Machine (SVM)**

In [15]:
from sklearn.svm import SVC

In [16]:
svm_model = SVC()
svm_model = svm_model.fit(x_train,y_train)

In [None]:
y_pred = svm_model.predict(x_test)
print(classification_report(y_test, y_pred))

**Neural Networks (NN)**

In [35]:
import tensorflow as tf

In [34]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Binary Crossentropy')
  plt.legend()
  plt.grid(True)
  plt.show()

def plot_accuracy(history):
  plt.plot(history.history['accuracy'], label='accuracy')
  plt.plot(history.history['val_accuracy'], label='val_accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend()
  plt.grid(True)
  plt.show()

In [30]:
nn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid'),
])

nn_model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss = 'binary_crossentropy',
  metrics= ['accuracy'])

In [37]:
history = nn_model.fit(
    x_train, y_train, epochs=100, batch_size = 32, validation_split=0.2
)


Epoch 1/100


UnimplementedError: ignored