In [1]:
################ Letter-Recognition #################
# Disable info messages from the tesnorflow
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
################## Import the libraries #####################
import numpy as np
import pandas as pd
import keras
from keras.models import Model, Sequential
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D,MaxPooling2D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler,scale
from sklearn.metrics import accuracy_score
from scipy.ndimage import zoom

In [4]:
############### Load the dataset ###################
path = 'HIGGS.csv'
classes = 2
data=pd.read_csv(path, header=None)
samples = data.sample(n=700000)
print(samples)

           0         1         2         3         4         5         6   \
8975414   1.0  1.178396 -0.566904 -0.777604  0.545251  1.228478  0.931279   
8663710   0.0  1.457484  0.834637 -1.163233  1.345466 -1.632310  1.105608   
184486    1.0  0.677134 -0.299063  0.848245  2.369470  1.466420  0.761073   
2415959   1.0  0.944876 -1.166869  1.198362  1.028024 -0.829216  1.198864   
2996681   0.0  0.535119 -0.013690  0.738382  1.253240 -0.212996  0.460326   
...       ...       ...       ...       ...       ...       ...       ...   
9301101   1.0  0.488634  0.835611  0.598557  0.729240  0.181285  1.266378   
2471621   1.0  0.536949 -0.711051 -0.694930  0.832150  1.147786  1.776630   
6148509   1.0  0.926026 -1.495096 -0.927417  0.540247  0.557798  0.905171   
10782333  0.0  1.433327  0.100265 -0.394192  1.321262 -1.138788  0.988900   
4982818   1.0  0.706415  0.298954 -0.721563  1.069999  0.048215  0.716094   

                7         8         9   ...        19        20        21  

In [7]:
X =samples.iloc[:,1:]
Y =samples.iloc[:,0]
X = MinMaxScaler().fit_transform(X)
classes = 2
Y = to_categorical(Y,classes)

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(560000, 28) (560000, 2)
(140000, 28) (140000, 2)


In [10]:
dim = X_train.shape[1]
x = int(dim**(1/2)) +1
f = x*x
model = Sequential()
model.add(Dense(f-dim,name='feature', activation='relu',input_shape=(dim,)))
model.add(Dense(classes,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
# here, inputs and labels are same
model.fit(X_train,Y_train,epochs=10,batch_size=1024,verbose=0)
extract = Model(model.inputs, model.get_layer('feature').output)
# predict whole inputs through it
x1 = extract.predict(X_train)
x2 = extract.predict(X_test)
# concatenate on horizontal axis
X1 = np.concatenate((X_train, x1), axis=1) 
X2 = np.concatenate((X_test, x2), axis=1) 
print(X1.shape,X2.shape)
train = X1.reshape(X1.shape[0],x,x)
test = X2.reshape(X2.shape[0],x,x)
print(train.shape,test.shape)

(560000, 36) (140000, 36)
(560000, 6, 6) (140000, 6, 6)


In [11]:
from PIL import Image
x_train = []
x_test = []
for i in range(len(train)):
    img = Image.fromarray(train[i])
    imgs = img.resize(size=(32, 32))
    imgs = np.array(imgs)
    x_train.append(np.repeat(imgs[:, :, np.newaxis], 3, axis=2))
x_train = np.array(x_train,dtype='float32')
for i in range(len(test)):
    img = Image.fromarray(test[i])
    imgs = img.resize(size=(32, 32))
    imgs = np.array(imgs)
    x_test.append(np.repeat(imgs[:, :, np.newaxis], 3, axis=2))
x_test = np.array(x_test,dtype='float32')
print(x_train.shape,x_test.shape)

(560000, 32, 32, 3) (140000, 32, 32, 3)


In [13]:
############### Feature-extraction ###############
# pretrained model for FE
from keras import callbacks 
pretrained_model = keras.applications.DenseNet121(input_shape=(32,32,3), weights=None, include_top=False)
x = Flatten()(pretrained_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
prediction = Dense(classes, activation='softmax')(x)
model = Model(inputs=pretrained_model.input, outputs=prediction)
# tell the model what cost and optimization method to use
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)
# fit training data and validate on the testing data
#model.fit(x_train,Y_train,batch_size=1024, epochs=20,verbose=1,validation_data=(x_test,Y_test))
earlystopping = callbacks.EarlyStopping(monitor ="val_loss",  mode ="min", patience = 5,  restore_best_weights = True) 
history = model.fit(x_train,Y_train,batch_size=1024, epochs=20,verbose=1,validation_data=(x_test,Y_test), callbacks =[earlystopping]) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x26da633e640>

In [14]:
score = model.evaluate(x_test,Y_test)
acc = model.evaluate(x_train,Y_train)
print('training accuracy: ',acc[1])
print('testing accuracy: ',score[1])

training accuracy:  0.8217357397079468
testing accuracy:  0.739128589630127
