In [1]:
################ Letter-Recognition #################
# Disable info messages from the tesnorflow
import os,math
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
################## Import the libraries #####################
import numpy as np
import pandas as pd
import keras
from keras.models import Model, Sequential
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D,MaxPooling2D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler,scale
from sklearn.metrics import accuracy_score
from scipy.ndimage import zoom

In [2]:
############### Load the dataset ###################
path = 'covtype.csv'
classes = 7  # total classes
data=pd.read_csv(path, header=None)
X=data.iloc[:,:54]
Y=data.iloc[:,-1]
# scaling the inputs
X = MinMaxScaler().fit_transform(X)
# one-hot encoding
Y = LabelEncoder().fit_transform(Y)
Y = to_categorical(Y,classes)
print(X.shape,Y.shape)

(581012, 54) (581012, 7)


In [3]:
# split in train-test as per requirenments
X_train,X_test, Y_train,Y_test = train_test_split(X,Y,test_size=0.483659)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(300000, 54) (300000, 7)
(281012, 54) (281012, 7)


In [4]:
######## REMAINING ###############
#### This will take out the nearest perfect square number of 54(attributes) that is 64(8*8). 
### so, we need 8 node in hidden layer and join it with 54+8=64(8*8).

dim = X_train.shape[1]
x = int(dim**(1/2)) +1
f = x*x
model = Sequential()
model.add(Dense(f-dim,name='feature', activation='relu',input_shape=(dim,)))
model.add(Dense(classes,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
# here, inputs and labels are same
model.fit(X_train,Y_train,epochs=100,batch_size=64,verbose=0)
extract = Model(model.inputs, model.get_layer('feature').output)
# predict whole inputs through it
x1 = extract.predict(X_train)
x2 = extract.predict(X_test)
# concatenate on horizontal axis
X1 = np.concatenate((X_train, x1), axis=1) 
X2 = np.concatenate((X_test, x2), axis=1) 
print(X1.shape,X2.shape)
train = X1.reshape(X1.shape[0],x,x)
test = X2.reshape(X2.shape[0],x,x)
print(train.shape,test.shape)

(300000, 64) (281012, 64)
(300000, 8, 8) (281012, 8, 8)


In [5]:
# resize(reshape) it and convert it in 'rgb'
from PIL import Image
x_train = []
x_test = []
for i in range(len(train)):
    img = Image.fromarray(train[i])
    imgs = img.resize(size=(32, 32))
    imgs = np.array(imgs)
    x_train.append(np.repeat(imgs[:, :, np.newaxis], 3, axis=2))
x_train = np.array(x_train,dtype='float32')
for i in range(len(test)):
    img = Image.fromarray(test[i])
    imgs = img.resize(size=(32, 32))
    imgs = np.array(imgs)
    x_test.append(np.repeat(imgs[:, :, np.newaxis], 3, axis=2))
x_test = np.array(x_test,dtype='float32')
print(x_train.shape,x_test.shape)

(300000, 32, 32, 3) (281012, 32, 32, 3)


In [6]:
############### Feature-extraction ###############
# pretrained model for FE
pretrained_model = keras.applications.DenseNet121(input_shape=(32,32,3), weights=None, include_top=False)
x = Flatten()(pretrained_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
prediction = Dense(classes, activation='softmax')(x)
model = Model(inputs=pretrained_model.input, outputs=prediction)
# tell the model what cost and optimization method to use
model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)
# fit training data and validate on the testing data
model.fit(x_train,Y_train,batch_size=128, epochs=20,verbose=1,validation_data=(x_test,Y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1ec8490a430>

In [7]:
score = model.evaluate(x_test,Y_test)
acc = model.evaluate(x_train,Y_train)
print('training accuracy: ',acc[1])
print('testing accuracy: ',score[1])

training accuracy:  0.953790009021759
testing accuracy:  0.9399491548538208
