In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn import model_selection
from sklearn.preprocessing import StandardScaler,LabelEncoder, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# from sklearn import preprocessing
# from yellowbrick.classifier import ConfusionMatrix

In [5]:
df = pd.read_csv("letter-recognition.data", sep = ",", header=None)

In [6]:
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
5,S,4,11,5,8,3,8,8,6,9,5,6,6,0,8,9,7
6,B,4,2,5,4,4,8,7,6,6,7,6,6,2,8,7,10
7,A,1,1,3,2,1,8,2,2,2,8,2,8,1,6,2,7
8,J,2,2,4,4,2,10,6,2,6,12,4,8,1,6,1,7
9,M,11,15,13,9,7,13,2,6,2,12,1,9,8,1,1,8


In [7]:
names = ['letter_Class',
         'x-box',
         'y-box',
         'width',
         'high',
         'onpix',
         'x-bar',
         'y-bar',
         'x2bar',
         'y2bar',
         'xybar',
         'x2ybr',
         'xy2br',
         'x-ege',
         'xegvy',
         'y-ege',
         'yegvx']

In [8]:
df.columns = names

In [9]:
df.head(10)

Unnamed: 0,letter_Class,x-box,y-box,width,high,onpix,x-bar,y-bar,x2bar,y2bar,xybar,x2ybr,xy2br,x-ege,xegvy,y-ege,yegvx
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10
5,S,4,11,5,8,3,8,8,6,9,5,6,6,0,8,9,7
6,B,4,2,5,4,4,8,7,6,6,7,6,6,2,8,7,10
7,A,1,1,3,2,1,8,2,2,2,8,2,8,1,6,2,7
8,J,2,2,4,4,2,10,6,2,6,12,4,8,1,6,1,7
9,M,11,15,13,9,7,13,2,6,2,12,1,9,8,1,1,8


In [10]:
# X = df.iloc[:, 1 : 17]
# Y = df.select_dtypes(include = [object])
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [11]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [12]:
y

array([19,  8,  3, ..., 19, 18,  0])

In [13]:
onehot_encoder = OneHotEncoder(categories='auto')
y = onehot_encoder.fit_transform(y.reshape(-1, 1)).toarray()


In [14]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [15]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
model = Sequential()
model.add(Dense(64, input_shape=(16,), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(26, activation='softmax'))

In [18]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, epochs=200, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200

In [None]:
score = model.evaluate(X_test, y_test)
print(f'Test loss: {score[0]}')
print(f'Test accuracy: {score[1]}')

In [None]:
# print(confusion_matrix(Y_test, predictions))
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
cm = confusion_matrix(y_true, y_pred)
print(cm)

In [None]:
target_names = label_encoder.inverse_transform(np.arange(26))
print(classification_report(y_true, y_pred, target_names=target_names))

In [None]:
# create a new input with 16 feature values
new_input = [[4,2,5,4,4,8,7,6,6,7,6,6,2,8,7,10]]

# standardize the input using the scaler object
new_input = scaler.transform(new_input)

# make a prediction
prediction = model.predict(new_input)

# print the predicted letter
val=np.argmax(prediction)

print(chr(ord('A')+val))

In [None]:

# create a new input with 16 feature values
new_input = [[5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10]]

# standardize the input using the scaler object
new_input = scaler.transform(new_input)

# make a prediction
prediction = model.predict(new_input)

# print the predicted letter
val=np.argmax(prediction)

print(chr(ord('A')+val))