# Loading the Training and Testing data 

In [1]:
# Importing the important libraries
import math
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import csv

In [2]:
train_data = pd.read_csv(r"C:\Users\alihi\OneDrive\Dokumente\MLL project\csvTrainImages 13440x1024.csv", header= None)
train_label = pd.read_csv(r"C:\Users\alihi\OneDrive\Dokumente\MLL project\csvTrainLabel 13440x1.csv", header= None)
test_data= pd.read_csv(r"C:\Users\alihi\OneDrive\Dokumente\MLL project\csvTestImages 3360x1024.csv", header= None)
test_label= pd.read_csv(r"C:\Users\alihi\OneDrive\Dokumente\MLL project\csvTestLabel 3360x1.csv", header= None)

In [3]:
print (train_label.head(3))
print (test_data.shape)
print(test_label.shape)
print(type(train_label))

   0
0  1
1  1
2  1
(3360, 1024)
(3360, 1)
<class 'pandas.core.frame.DataFrame'>


In [4]:
# Change the target data into 1d array

train_label2= train_label.iloc[:,[0]]
print(train_label2.head())

test_label2= test_label.iloc[:,[0]]
print(test_label2.head())

train_label1= train_label2.values.ravel()
print(train_label1)
test_label1= test_label2.values.ravel()

   0
0  1
1  1
2  1
3  1
4  1
   0
0  1
1  1
2  2
3  2
4  3
[ 1  1  1 ... 28 28 28]


# Baseline Model

In [5]:
# Applying Randomness of the outputs (Number of the Alphabets)

from sklearn.metrics import accuracy_score
import random

random.seed(123)

Random_Train_label= np.random.randint(0, 29, size=13440)
print(Random_Train_label)

Random_Test_label= np.random.randint(0, 29, size=3360)
print(Random_Test_label)

accuracy_score(train_label1, Random_Train_label)

[27  8 11 ... 13 20 19]
[17  8 26 ...  1  2  6]


0.033407738095238095

In [6]:
accuracy_score(test_label1,Random_Test_label)

0.03184523809523809

# Applying KNN from sklearn 

In [76]:
from sklearn import  model_selection
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [80]:
# APPLYING THE MODEL
knn= KNeighborsClassifier(n_neighbors=3)
knn.fit(train_data, (train_label1))
knn.score(train_data,(train_label1))

0.7110863095238096

In [81]:
new_prediction = knn.predict(test_data)
print("Prediction: {}".format(new_prediction))
knn.score(test_data,np.ravel(test_label1))

Prediction: [ 1  1  2 ...  8 28  8]


0.5047619047619047

# Suppot Vector Machine For Handwritten Charachters

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [8]:
# define Pipeline object
pipe = [('scaler', StandardScaler()), ('SVM', SVC(kernel='poly'))]
pipeline = Pipeline(pipe) 

In [9]:
parameters = {'SVM__C':[0.001, 0.1, 100, 10e5], 'SVM__gamma':[10,1,0.1,0.01]}
searcher = GridSearchCV(pipeline, param_grid=parameters, cv=5)

In [None]:
searcher.fit(train_data, train_label1)
print("Best train accuracy =", searcher.best_score_)

In [None]:
y_pred = searcher.predict(test_data)

In [None]:
print(" Best test accuracy =",searcher.score(test_data, test_label1))
print( "best parameters from train data: ", searcher.best_params_)

# Applying CNN Algorithm

In [53]:
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils

In [54]:
# Converting the data to numpy
train_data = train_data.iloc[:,:].values.astype('float32')
train_label = train_label.iloc[:,:].values.astype('int32')-1
test_data = test_data.iloc[:,:].values.astype('float32')
test_label = test_label.iloc[:,:].values.astype('int32')-1

In [55]:
labelencoder_X = LabelEncoder()
train_label = labelencoder_X.fit_transform(train_label)

  y = column_or_1d(y, warn=True)


In [56]:
# reshape to present in 2D
train_data = train_data.reshape([-1, 32, 32, 1])
test_data = test_data.reshape([-1, 32, 32, 1])

In [57]:
# Specify the ouput as categorical
train_label =np_utils.to_categorical(train_label,28)
test_label =np_utils.to_categorical(test_label,28)

In [58]:
IDG = ImageDataGenerator(rescale=1.0/255.0,
        featurewise_center=False, 
        samplewise_center=False,  
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.1,  
        width_shift_range=0.1, 
        height_shift_range=0.1,
        horizontal_flip=False,
        vertical_flip=False)

In [59]:
# prepare an iterators to scale images
train_iterator = IDG.flow(train_data, train_label, batch_size=64)
test_iterator = IDG.flow(test_data, test_label, batch_size=64)

In [60]:
#Building the Model

model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (32,32,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',  activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',  activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

In [61]:
model.add(Flatten())
model.add(Dense(units = 256, input_dim = 1024, activation = 'relu'))
model.add(Dense(units = 256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(28, activation = "softmax"))

In [62]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 32, 32, 32)        832       
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 32, 32, 32)        25632     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 16, 16, 64)        18496     
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
__________

In [63]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [64]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [65]:
model.fit_generator(train_iterator,epochs = 30, verbose = 2, steps_per_epoch=train_data.shape[0] // 100)


Epoch 1/30
 - 162s - loss: 2.6358 - acc: 0.2087
Epoch 2/30
 - 102s - loss: 1.5824 - acc: 0.4839
Epoch 3/30
 - 94s - loss: 1.1159 - acc: 0.6207
Epoch 4/30
 - 95s - loss: 0.8233 - acc: 0.7171
Epoch 5/30
 - 86s - loss: 0.6895 - acc: 0.7732
Epoch 6/30
 - 86s - loss: 0.5591 - acc: 0.8182
Epoch 7/30
 - 72s - loss: 0.4741 - acc: 0.8407
Epoch 8/30
 - 70s - loss: 0.4424 - acc: 0.8574
Epoch 9/30
 - 68s - loss: 0.3814 - acc: 0.8784
Epoch 10/30
 - 66s - loss: 0.3647 - acc: 0.8875
Epoch 11/30
 - 62s - loss: 0.3128 - acc: 0.8967
Epoch 12/30
 - 60s - loss: 0.2938 - acc: 0.9054
Epoch 13/30
 - 60s - loss: 0.2836 - acc: 0.9136
Epoch 14/30
 - 59s - loss: 0.2646 - acc: 0.9199
Epoch 15/30
 - 60s - loss: 0.2410 - acc: 0.9254
Epoch 16/30
 - 61s - loss: 0.2211 - acc: 0.9311
Epoch 17/30
 - 59s - loss: 0.2209 - acc: 0.9347
Epoch 18/30
 - 59s - loss: 0.2204 - acc: 0.9327
Epoch 19/30
 - 59s - loss: 0.2099 - acc: 0.9372
Epoch 20/30
 - 59s - loss: 0.1835 - acc: 0.9426
Epoch 21/30
 - 60s - loss: 0.2040 - acc: 0.9387

<tensorflow.python.keras.callbacks.History at 0x27235ee8b38>

In [66]:
y_pred = model.predict(test_data)
y_pred = np.argmax(y_pred,axis = 1)

In [69]:
model.evaluate_generator(test_iterator, steps=len(test_iterator))

[0.12069274651862326, 0.9651785714285714]