8. MAGIC Gamma Telescope dataset: magic.mat

magic.mat (953.4KB): 19020 samples
the last column is label includes 2 classes

Data are MC generated to simulate registration of high energy gamma particles in an atmospheric Cherenkov telescope

Attribute Information:
1. fLength: continuous # major axis of ellipse [mm]
2. fWidth: continuous # minor axis of ellipse [mm]
3. fSize: continuous # 10-log of sum of content of all pixels [in #phot]
4. fConc: continuous # ratio of sum of two highest pixels over fSize [ratio]
5. fConc1: continuous # ratio of highest pixel over fSize [ratio]
6. fAsym: continuous # distance from highest pixel to center, projected onto major axis [mm]
7. fM3Long: continuous # 3rd root of third moment along major axis [mm]
8. fM3Trans: continuous # 3rd root of third moment along minor axis [mm]
9. fAlpha: continuous # angle of major axis with vector to origin [deg]
10. fDist: continuous # distance from origin to center of ellipse [mm]
11. class: # gamma (0), hadron (1)
g = gamma (0): 12332
h = hadron (1): 6688 

https://archive.ics.uci.edu/ml/datasets/MAGIC+Gamma+Telescope

In [1]:
import scipy.io as sio 
import warnings
warnings.filterwarnings('ignore')

data = sio.loadmat('./magic.mat')
print(data.keys())


dict_keys(['__header__', '__version__', '__globals__', 'magic04'])


In [2]:
import pandas as pd
data = pd.DataFrame(data['magic04'],columns=['fLength','fWidth','fSize','fConc','fConc1','fAsym','fM3Long','fM3Trans','fAlpha','fDist','class'])
data


Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.0110,-8.2027,40.0920,81.8828,0.0
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.2610,0.0
2,162.0520,136.0310,4.0612,0.0374,0.0187,116.7410,-64.8580,-45.2160,76.9600,256.7880,0.0
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.4490,116.7370,0.0
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.6480,356.4620,0.0
...,...,...,...,...,...,...,...,...,...,...,...
19015,21.3846,10.9170,2.6161,0.5857,0.3934,15.2618,11.5245,2.8766,2.4229,106.8258,1.0
19016,28.9452,6.7020,2.2672,0.5351,0.2784,37.0816,13.1853,-2.9632,86.7975,247.4560,1.0
19017,75.4455,47.5305,3.4483,0.1417,0.0549,-9.3561,41.0562,-9.4662,30.2987,256.5166,1.0
19018,120.5135,76.9018,3.9939,0.0944,0.0683,5.8043,-93.5224,-63.8389,84.6874,408.3166,1.0


In [3]:
X = data.drop('class',axis=1)
y = data['class']

In [4]:
# Use PCA to reduce to 6 dimensions
from sklearn.decomposition import PCA

n_components = 6
X_pca = PCA(n_components = n_components).fit_transform(X)

print('X_pca.shape',X_pca.shape)

X_pca.shape (19020, 6)


In [5]:
X_pca

array([[-117.84756134,   14.90843517,  -12.2354962 ,   14.31010403,
           3.77822012,    8.06011395],
       [ -10.03020362,  -39.38496448,    7.96870364,  -17.10068505,
         -16.46332331,    9.91633182],
       [  58.82204263,  -40.77377882,  136.30468772,  148.41048331,
          39.7182366 ,   47.61503095],
       ...,
       [  63.16087604,  -34.64817335,  -16.0114702 ,   13.53489839,
          10.68839485,    9.45718196],
       [ 211.45336229,  -17.46252333,  131.59820968,   24.61581686,
          62.51028996,   66.68782282],
       [ 187.96313576,  204.38635587,   66.6663764 ,   49.72503797,
           2.11399085,  -28.33853521]])

In [6]:
y.value_counts()

0.0    12332
1.0     6688
Name: class, dtype: int64

In [7]:
# Divide training and test sets
from sklearn.model_selection import train_test_split

x_pca_train,x_pca_test,y_train,y_test = train_test_split(X_pca,y,test_size=0.2,random_state=11)

In [8]:
# One-Hot
import keras

y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)

Using TensorFlow backend.


In [9]:
#  Build a full neural network
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(6,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               3584      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 1026      
Total params: 267,266
Trainable params: 267,266
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_pca_train, y_train,
                    batch_size=32,
                    epochs=10,
                    verbose=1,
                    validation_data=(x_pca_test, y_test))

Train on 15216 samples, validate on 3804 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
score = model.evaluate(x_pca_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.44498291179208727
Test accuracy: 0.8215036988258362
