In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import scale

letters = pd.read_csv("letter-recognition.csv")

In [2]:
def hellinger_kernel(X, Y):
    # X /= (X.sum(axis=1, keepdims=True))
    # X = np.sqrt(X)
    # X /= np.linalg.norm(X, axis=1, ord=2)
    return np.sqrt(np.dot(X, Y.T))

In [3]:
def chi_sq_kernel(X, Y):
    sx = (X.sum(axis=1, keepdims=True))
    sy = ((Y).sum(axis=1, keepdims=True))
    print(sx.shape)
    print(sy.shape)
    return 2*np.dot(X, Y.T)/(sx+sy)

In [4]:
def intersection_kernel(X, Y):
    return np.inner(X, Y)

In [5]:
print("Dimensions: ", letters.shape, "\n")
print(letters.info())
letters.head()

Dimensions:  (20000, 17) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None


Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [6]:
letters.columns = ['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar', 'ybar', 'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge', 'xedgey', 'yedge', 'yedgex']
print(letters.columns)

Index(['letter', 'xbox', 'ybox', 'width', 'height', 'onpix', 'xbar', 'ybar',
       'x2bar', 'y2bar', 'xybar', 'x2ybar', 'xy2bar', 'xedge', 'xedgey',
       'yedge', 'yedgex'],
      dtype='object')


In [7]:
letters = letters[letters['letter'].isin(['A', 'B']) == True] 
# letters = letters[(letters['letter'].any('A', 'B'))] 

In [8]:
order = list(np.sort(letters['letter'].unique()))
print(order)

['A', 'B']


In [9]:
letter_means = letters.groupby('letter').mean()
letter_means.head()

Unnamed: 0_level_0,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
letter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A,3.337136,6.975919,5.12801,5.178707,2.991128,8.851711,3.631179,2.755387,2.043093,7.802281,2.338403,8.465146,2.771863,6.321926,2.875792,7.468948
B,3.98564,6.962141,5.088773,5.169713,4.596606,7.671018,7.062663,5.366841,5.571802,7.954308,5.506527,6.652742,3.117493,7.91906,6.612272,9.100522


In [10]:
round(letters.drop('letter', axis=1).mean(), 2)

xbox      3.66
ybox      6.97
width     5.11
height    5.17
onpix     3.78
xbar      8.27
ybar      5.32
x2bar     4.04
y2bar     3.78
xybar     7.88
x2ybar    3.90
xy2bar    7.57
xedge     2.94
xedgey    7.11
yedge     4.72
yedgex    8.27
dtype: float64

In [11]:
X = letters.drop("letter", axis = 1)
y = letters['letter']

In [12]:
X_scaled = scale(X, with_mean=False)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.3, random_state = 101)

In [13]:
import random
print("Number of training samples = ", len(X_train))
print("Number of testing samples = ", len(X_test))

Number of training samples =  1088
Number of testing samples =  467


In [14]:
# Linear SVM model

model_linear = SVC(kernel='linear')
model_linear.fit(X_train, y_train)
y_pred = model_linear.predict(X_test)

In [15]:
print("Accuracy linear kernel without Markov Sampling : ", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy linear kernel without Markov Sampling :  0.9914346895074947 

[[225   1]
 [  3 238]]


In [16]:
# RBF SVM model
non_linear_model = SVC(kernel='rbf')
non_linear_model.fit(X_train, y_train)
y_pred = non_linear_model.predict(X_test)

In [17]:
print("Accuracy rbf kernel without Markov Sampling : ", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy rbf kernel without Markov Sampling :  0.9978586723768736 

[[225   1]
 [  0 241]]


In [18]:
# Polynomial SVM model
non_linear_model = SVC(kernel='poly')
non_linear_model.fit(X_train, y_train)
y_pred = non_linear_model.predict(X_test)

In [19]:
print("Accuracy polynomial kernel without Markov Sampling : ", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy polynomial kernel without Markov Sampling :  0.9978586723768736 

[[225   1]
 [  0 241]]


In [20]:
# Hellinger SVM model

model_hell = SVC(kernel=hellinger_kernel)
model_hell.fit(X_train, y_train)
y_pred = model_hell.predict(X_test)


In [21]:
print("Accuracy Hellinger kernel without Markov Sampling : ", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy Hellinger kernel without Markov Sampling :  0.987152034261242 

[[222   4]
 [  2 239]]


In [22]:
# Chi Squred SVM model

from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import AdditiveChi2Sampler

chi2sampler = AdditiveChi2Sampler(sample_steps=2)
X_transformed = chi2sampler.fit_transform(X_train, y_train)
clf = SGDClassifier(max_iter=100, random_state=0, tol=1e-3)
clf.fit(X_transformed, y_train)

print("Accuracy Chi Squared kernel without Markov Sampling : ", end = '')
clf.score(X_transformed, y_train)

Accuracy Chi Squared kernel without Markov Sampling : 

0.9944852941176471

In [23]:
# Intersection SVM model

model_hell = SVC(kernel=intersection_kernel)
model_hell.fit(X_train, y_train)
y_pred = model_hell.predict(X_test)


In [24]:
# accuracy
print("Accuracy Intersection kernel without Markov Sampling : ", metrics.accuracy_score(y_true=y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=y_test, y_pred=y_pred))

Accuracy Intersection kernel without Markov Sampling :  0.9914346895074947 

[[225   1]
 [  3 238]]


In [25]:
import math
def markov_samp(X_train, Y_train, k = 5, q = 1.2):

#Taking N1 as 700
    # Init_sampl = np.concatenate((X_train, np.array()), axis = 0)
    Init_sampl = np.concatenate((np.vstack(X_train), np.vstack(Y_train.to_numpy())), axis = 1)
    Dtr = random.sample(list(Init_sampl), 800)

    m = len(Dtr)
    print("M : " , m)
    # mplus =np.count_nonzero(np.array(Dtr[16]) == 'A')
    # mplus = Dtr[16].count('A')
#Initializing mneg and mpos as 0
    mneg = 0
    mplus = 0

#Choosing first zt random sample from chosen training samples
    index = np.random.choice(len(Dtr), 1, replace=False)  
    Dtra = np.array(Dtr)
    print(type(Dtra))
    zt = Dtra[index][0]

    print("zt : ", zt)
    if m%2 == 0:
        if zt[16] == 'A':
            mplus += 1;
        else:
            mneg += 1

#Initializing empty sample holder
    samp = []

#Training SVM for choosing Markov CHain Samples
    model_linear = SVC(kernel='linear')#
    model_linear.fit(X_train, Y_train)#
    
    yzt = model_linear.predict(np.array(zt)[0:16].reshape(1, -1))
#Calculating loss for zt sample
    fxy1 = 1
    if not yzt==zt[16]:
        fxy1 = 2 
    lzt = math.exp(0-fxy1)

    Pd = 0
    Pdd = 0

#Loop for Markov sampling    
    while(mplus < m/2 or mneg < m/2 ):
    #Choosing zstar sample 
        zstar = Dtra[np.random.choice(len(Dtr), 1, replace=False)][0]
        ystar = model_linear.predict(np.array(zstar)[ 0:16].reshape(1, -1)) #
    #Calculating loss for zstar and P ratio
        fxy = 1
        if not ystar==zstar[16]:
            fxy = 2 
        lzstar = math.exp(0-fxy)
        P = math.exp(lzt-lzstar)
        # print(zt)
        yt = zt[16]
        zt = zstar

    #Accept/Reject
        if P == 1:
            if zt[16] == yt:
                r = random.uniform(0.001, 1.0)
                if r <= P:
                    samp.append(zstar) #Pdash = e−y∗f0 /e−ytf0
            else:
                ct = 1 if yt=='A' else -1
                cst = 1 if ystar=='A' else -1
                Pd = math.exp(ct*fxy1-cst*fxy)
                r = random.uniform(0.001, 1.0)
                if r <= Pd:
                    samp.append(zstar) #P

        if len(samp) == k:
            Pdd = q*P
            samp.append(zstar) #Pdd

    #Updating zt and mplus and mneg            
        ztp1 = zstar
        if yt == 'A':
            mplus += 1
        else:
            mneg += 1

        if P > 1 or Pd > 1 or Pdd > 1:
            samp.append(zstar)
    return samp


In [26]:
nsamp = np.array(markov_samp(X_train, y_train))

X_train = nsamp[:, 0:16]
y_train = nsamp[:, 16]
print("Shape of Markov Sample features : ", X_train.shape)
print("Shape of Markov Sample labels : ", y_train.shape)

M :  800
<class 'numpy.ndarray'>
zt :  ['2.3397874294413348' '1.4833209661452773' '2.8806959611043217'
 '1.8228415078652038' '1.8531181991492494' '4.040776535347255'
 '3.1160548445974845' '2.278778335741113' '2.830418084375805'
 '4.116003554693887' '2.7652018417477167' '4.306385225019712'
 '1.2364099785677056' '5.939937945779338' '2.4188414979771236'
 '6.194548585975666' 'B']
Shape of Markov Sample features :  (1466, 16)
Shape of Markov Sample labels :  (1466,)


In [27]:
# print(y_train)
#Converting label to ASCII in list for sklearn as sampling returned ndarray
Y_train = []
for i in y_train:
    Y_train.append(ord(i))
# print(Y_train)

In [28]:
print("", X_test.shape)

 (467, 16)


In [29]:
# print(y_pred)
#Converting label to ASCII in list for sklearn as training is done on ASCII
Y_test = []
for i in y_test:
    Y_test.append(ord(i))
# print(Y_test)

In [30]:
# Linear SVM model

model_linear = SVC(kernel='linear')
model_linear.fit(X_train, Y_train)
y_pred = model_linear.predict(X_test)


In [31]:
# accuracy
print("Accuracy Linear kernel with Markov Sampling : ", metrics.accuracy_score(y_true=Y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=Y_test, y_pred=y_pred))

Accuracy Linear kernel with Markov Sampling :  0.9914346895074947 

[[224   2]
 [  2 239]]


In [32]:
# RBF SVM model
non_linear_model = SVC(kernel='rbf')
non_linear_model.fit(X_train, Y_train)
y_pred = non_linear_model.predict(X_test)

In [33]:
print("Accuracy rbf kernel with Markov Sampling : ", metrics.accuracy_score(y_true=Y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=Y_test, y_pred=y_pred))

Accuracy rbf kernel with Markov Sampling :  0.9935760171306209 

[[224   2]
 [  1 240]]


In [34]:
#Polynomial SVM model
non_linear_model = SVC(kernel='poly')
non_linear_model.fit(X_train, Y_train)
y_pred = non_linear_model.predict(X_test)

In [35]:
print("Accuracy polynomial kernel with Markov Sampling : ", metrics.accuracy_score(y_true=Y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=Y_test, y_pred=y_pred))

Accuracy polynomial kernel with Markov Sampling :  0.9914346895074947 

[[222   4]
 [  0 241]]


In [36]:
# Hellinger SVM model

model_hell = SVC(kernel=hellinger_kernel)
model_hell.fit(X_train, Y_train)
y_pred = model_hell.predict(X_test)


In [37]:
print("Accuracy Hellinger kernel with Markov Sampling : ", metrics.accuracy_score(y_true=Y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=Y_test, y_pred=y_pred))

Accuracy Hellinger kernel with Markov Sampling :  0.9828693790149893 

[[220   6]
 [  2 239]]


In [38]:
# Chi Squared SVM model
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
X_transformed = chi2sampler.fit_transform(X_train.astype(np.float), pd.Series(y_train))
clf = SGDClassifier(max_iter=100, random_state=0, tol=1e-3)
clf.fit(X_transformed, pd.Series(y_train))

print("Accuracy Chi Squared kernel without Markov Sampling : ", end = '')
clf.score(X_transformed, pd.Series(y_train))

Accuracy Chi Squared kernel without Markov Sampling : 

0.9945429740791268

In [39]:
# Intersection SVM model

model_hell = SVC(kernel=intersection_kernel)
model_hell.fit(X_train, Y_train)
y_pred = model_hell.predict(X_test)

In [40]:
print("Accuracy Intersection kernel with Markov Sampling : ", metrics.accuracy_score(y_true=Y_test, y_pred=y_pred), "\n")
print(metrics.confusion_matrix(y_true=Y_test, y_pred=y_pred))

Accuracy Intersection kernel with Markov Sampling :  0.9914346895074947 

[[224   2]
 [  2 239]]
