In [7]:
import time
import pandas as pd

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB as nb
from sklearn.naive_bayes import MultinomialNB as mnb
from sklearn.naive_bayes import BernoulliNB as bnb
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.linear_model import LogisticRegression as lr
from sklearn.neural_network import MLPClassifier as mlp
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
import sklearn.metrics as sm


def calc_accuracy(method,label_test,pred):
    print("accuracy score for ",method,sm.accuracy_score(label_test,pred))
    print("precision_score for ",method,sm.precision_score(label_test,pred,average='macro'))
    print("f1 score for ",method,sm.f1_score(label_test,pred,average='macro'))
    print("recall score for ",method,sm.recall_score(label_test,pred,average='macro'))

def predict_svm(X_train, X_test, y_train, y_test):
    svc=SVC(kernel='linear') 
    print("svm started")
    svc.fit(X_train,y_train)
    
    #filename = 'svm_model.sav'
    #pickle.dump(svc, open(filename, 'wb'))
    
    y_pred=svc.predict(X_test)
    calc_accuracy("SVM",y_test,y_pred)

def predict_lr(X_train, X_test, y_train, y_test):
    clf = lr()
    print("lr started")
    clf.fit(X_train,y_train)
    
    #filename = 'lr_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("Logistic regression",y_test,y_pred)


def predict_nb(X_train, X_test, y_train, y_test):
    clf = nb()
    print("nb started")
    clf.fit(X_train,y_train)
    
    #filename = 'nb_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("Naive Bayes",y_test,y_pred)


def predict_knn(X_train, X_test, y_train, y_test):
    clf=knn(n_neighbors=8)
    print("knn started")
    clf.fit(X_train,y_train)
    
    #filename = 'knn_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("K nearest neighbours",y_test,y_pred)

def predict_mlp(X_train, X_test, y_train, y_test):
    clf=mlp()
    print("mlp started")
    clf.fit(X_train,y_train)
    
    #filename = 'mlp_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("MLP classifier",y_test,y_pred)


In [4]:

df = pd.read_csv("asl_dataset_orb.csv", sep=',',header=None)
X = df.iloc[:, :-1]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.45)


In [5]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,142,143,144,145,146,147,148,149,150
0,6,0,6,0,3,4,0,1,9,1,...,1,0,1,0,1,0,0,2,0,0
1,9,0,9,0,8,1,0,1,8,3,...,1,0,0,0,1,0,0,1,0,0
2,3,0,1,0,2,1,0,6,4,3,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,2,0,0,6,2,0,0,...,0,2,1,2,0,6,6,0,0,0
4,2,0,4,0,0,1,2,2,0,1,...,1,0,0,2,3,2,0,1,6,0


In [12]:
#predict_svm(X_train, X_test,y_train, y_test)
#predict_knn(X_train, X_test,y_train, y_test)
#predict_lr(X_train, X_test,y_train, y_test)
#predict_nb(X_train, X_test,y_train, y_test)
#predict_mlp(X_train, X_test,y_train, y_test)


svm started
accuracy score for  SVM 0.8525109765919305
precision_score for  SVM 0.85381742136407
f1 score for  SVM 0.8534345160139483
recall score for  SVM 0.8544197419782681
knn started
accuracy score for  K nearest neighbours 0.9596788859733444
precision_score for  K nearest neighbours 0.9595805413416535
f1 score for  K nearest neighbours 0.9571984385380874
recall score for  K nearest neighbours 0.9603804244128767
lr started
accuracy score for  Logistic regression 0.8447948871164688
precision_score for  Logistic regression 0.8445991054538576
f1 score for  Logistic regression 0.8454075186678485
recall score for  Logistic regression 0.8467212803658964
nb started
accuracy score for  Naive Bayes 0.7223246992803513
precision_score for  Naive Bayes 0.7511409286967207
f1 score for  Naive Bayes 0.7263522351976367
recall score for  Naive Bayes 0.7243850813664504
mlp started
accuracy score for  MLP classifier 0.961419552622691
precision_score for  MLP classifier 0.9613881542490446
f1 score for

In [12]:
predict_lr(X_train, X_test,y_train, y_test)

lr started
accuracy score for  Logistic regression 0.8459639915824478
precision_score for  Logistic regression 0.845931949518512
f1 score for  Logistic regression 0.846800724655216
recall score for  Logistic regression 0.8480992426675107


In [1]:
from sklearn.ensemble import RandomForestClassifier


In [5]:
def predict_rf(X_train, X_test, y_train, y_test):
    rtree=RandomForestClassifier(n_estimators=200)
    print("rf started")
    rtree.fit(X_train,y_train)
    y_pred=rtree.predict(X_test)
    calc_accuracy("RF classifier",y_test,y_pred)

In [6]:
predict_rf(X_train, X_test,y_train, y_test)

rf started
accuracy score for  RF classifier 0.9269699410251747
precision_score for  RF classifier 0.927462720984915
f1 score for  RF classifier 0.9273709982169112
recall score for  RF classifier 0.9281534565577367


In [8]:
from sklearn.linear_model import SGDClassifier as SGD

In [13]:
def predict_sgd(X_train, X_test, y_train, y_test):
    clf = SGD(loss='log')
    print("sgd started")
    clf.fit(X_train,y_train)
    
    #filename = 'nb_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("SGD",y_test,y_pred)

In [14]:
predict_sgd(X_train, X_test, y_train, y_test)



sgd started
accuracy score for  SGD 0.7718687485386194
precision_score for  SGD 0.8146509524434749
f1 score for  SGD 0.7776519538639254
recall score for  SGD 0.7754874479429061


In [3]:
def predict_mnb(X_train, X_test, y_train, y_test):
    clf = mnb()
    print("mnb started")
    clf.fit(X_train,y_train)
    
    #filename = 'nb_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("MNB",y_test,y_pred)

In [6]:
predict_mnb(X_train, X_test, y_train, y_test)

mnb started
accuracy score for  MNB 0.7858200618326362
precision_score for  MNB 0.8027619019429938
f1 score for  MNB 0.7886405841600639
recall score for  MNB 0.7828850144424185


In [8]:
def predict_bnb(X_train, X_test, y_train, y_test):
    clf = bnb()
    print("bnb started")
    clf.fit(X_train,y_train)
    
    #filename = 'nb_model.sav'
    #pickle.dump(clf, open(filename, 'wb'))
    
    y_pred=clf.predict(X_test)
    calc_accuracy("BNB",y_test,y_pred)

In [9]:
predict_bnb(X_train, X_test, y_train, y_test)

bnb started
accuracy score for  BNB 0.6253669689018212
precision_score for  BNB 0.7255378636449721
f1 score for  BNB 0.6513226245646709
recall score for  BNB 0.6289743320157843
