In [3]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn import neighbors
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold

from sklearn.decomposition import PCA

import joblib

In [4]:
#import file and reading few lines
dataTrain = pd.read_csv('ProfileWhite.csv')
dataEval = pd.read_csv('EvalProfileWhite.csv')

dataTrain = dataTrain.drop("image", axis = 1)
dataEval = dataEval.drop("image", axis = 1)

In [5]:
def ChangeLabel3(dataToEdit , listOfLabels1, label1, listOfLabels2, label2, listOfLabels3, label3):
  data = pd.DataFrame()
  data = data.append(dataToEdit)
  for label in listOfLabels1:
    data["label"].replace({label: label1}, inplace=True)
  for label in listOfLabels2:
    data["label"].replace({label: label2}, inplace=True)
  for label in listOfLabels3:
    data["label"].replace({label: label3}, inplace=True)
  
  ndata = data[data["label"] == label1]
  ndata = ndata.append(data[data["label"] == label2])
  ndata = ndata.append(data[data["label"] == label3])

  return ndata

def ChangeLabel2(dataToEdit , listOfLabels1, label1, listOfLabels2, label2):
  data = pd.DataFrame()
  data = data.append(dataToEdit)
  for label in listOfLabels1:
    data["label"].replace({label: label1}, inplace=True)
  for label in listOfLabels2:
    data["label"].replace({label: label2}, inplace=True)
  
  ndata = data[data["label"] == label1]
  ndata = ndata.append(data[data["label"] == label2])


  return ndata

def ChangeLabel4(dataToEdit , listOfLabels1, label1, listOfLabels2, label2, listOfLabels3, label3, listOfLabels4, label4):
  data = pd.DataFrame()
  data = data.append(dataToEdit)

  for label in listOfLabels1:
    data["label"].replace({label: label1}, inplace=True)
  for label in listOfLabels2:
    data["label"].replace({label: label2}, inplace=True)
  for label in listOfLabels3:
    data["label"].replace({label: label3}, inplace=True)
  for label in listOfLabels4:
    data["label"].replace({label: label4}, inplace=True) 

  ndata = data[data["label"] == label1]
  ndata = ndata.append(data[data["label"] == label2])
  ndata = ndata.append(data[data["label"] == label3])
  ndata = ndata.append(data[data["label"] == label4])

  return ndata

In [6]:
#SVM
def SVM(X_train, y_train, name):
  hyper_param = {'C': [0.1, 1, 10, 100], 
                'gamma': [1, 0.1, 0.01, 0.001], 
                'kernel': ['linear', 'rbf', 'poly']}
  model =SVC(probability = True)
  gs = GridSearchCV(estimator=model,
                      param_grid=hyper_param,
                      cv=StratifiedKFold(5).split(X_train, y_train),
                      scoring='accuracy',
                      refit=True,
                      verbose=3)

  gs.fit(X_train, y_train)

  joblib.dump(gs.best_estimator_, f'{name}.npy')

  return gs.best_estimator_, gs.best_params_, gs.best_score_

#KNN
def KNN(X_train, y_train, name):
  knn = neighbors.KNeighborsClassifier()
  k_range = list(range(1, 11))
  param_grid = dict(n_neighbors=k_range)
    
  # defining parameter range
  gs = GridSearchCV(knn, 
                      param_grid, 
                      cv=StratifiedKFold(5).split(X_train, y_train), 
                      scoring='accuracy',
                      verbose=3)
    
  # fitting the model for grid search
  grid_search=gs.fit(X_train, y_train)

  joblib.dump(gs.best_estimator_, f'{name}.npy')

  return gs.best_estimator_, gs.best_params_, gs.best_score_

#RFC
def RFC(X_train, y_train, name):
  param_grid = {'C': [0.1, 1, 10, 100], 
                'gamma': [1, 0.1, 0.01, 0.001], 
                'kernel': ['linear', 'rbf', 'poly']}

  
  rfc=RandomForestClassifier(random_state=42)
  param_grid = { 
      'n_estimators': [50, 200],
      'max_depth' : [4,5,6,7,8],
      'criterion' :['entropy']
  }
  
  # defining parameter range
  gs = GridSearchCV(estimator=rfc, 
                      param_grid=param_grid, 
                      cv=StratifiedKFold(5).split(X_train, y_train), 
                      scoring='accuracy',
                      verbose=3)

  gs.fit(X_train, y_train)

  joblib.dump(gs.best_estimator_, f'{name}.npy')

  return gs.best_estimator_, gs.best_params_, gs.best_score_

In [7]:
L1Stage1=['T', 'I', 'l', 'i', 't','j', '1', 'J', 'f', '9', 'g', 'q', '4', 'E', 'F', 'H', 'L', 'P', 'p', 'A']
          
L2Stage1=['M', 'N','m', 'n', 'u' ,'W', 'w', 'V', 'v', 'U', 'X', 'x', 'Y', 'y', 'Z', 'z', '7', 'K', 'R', 'k']

L3Stage1=['6', 'e','r', 'b', 'h', 'a', '0', 'O', 'o', 'D', 'Q', 'C', 'G', 'c', '3', 'B', '8', 'd', '2', '5', 's', 'S']

In [8]:
stage1Train = ChangeLabel3(dataTrain, L1Stage1 , 'Group1', L2Stage1, 'Group2', L3Stage1, 'Group3')
stage1Eval  = ChangeLabel3(dataTrain, L1Stage1 , 'Group1', L2Stage1, 'Group2', L3Stage1, 'Group3')

y_trainS1 = stage1Train['label']
y_evalS1 = stage1Eval['label']

X_trainS1 = stage1Train.drop("label", axis = 1)
X_evalS1 = stage1Eval.drop("label", axis = 1)

In [9]:
SVM_stage1, params, score = SVM(X_trainS1, y_trainS1, 'SVM_stage1')

y_predS1 = SVM_stage1.predict(X_evalS1)

print('Best Params of SVM_stage1 with White: '+str(params)+'\n')
print('Best training accuracy of SVM_stage1 with White: '+str(score)+'\n')
print("evaluation accuracy of SVM_stage1", metrics.accuracy_score(y_evalS1, y_predS1), "\n")

# KNN_stage1, params, score = KNN(X_trainS1, y_trainS1, 'KNN_stage1')

# y_predS1 = KNN_stage1.predict(X_evalS1)

# print('Best Params of KNN_stage1 with White: '+str(params)+'\n')
# print('Best training accuracy of KNN_stage1 with White: '+str(score)+'\n')
# print("evaluation accuracy of KNN_stage1", metrics.accuracy_score(y_evalS1, y_predS1), "\n")

# RFC_stage1, params, score = RFC(X_trainS1, y_trainS1, 'RFC_stage1')

# y_predS1 = RFC_stage1.predict(X_evalS1)

# print('Best Params of RFC_stage1 with White: '+str(params)+'\n')
# print('Best training accuracy of RFC_stage1 with White: '+str(score)+'\n')
# print("evaluation accuracy of RFC_stage1", metrics.accuracy_score(y_evalS1, y_predS1), "\n")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.720 total time=   3.5s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.745 total time=   3.6s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.727 total time=   4.4s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.697 total time=   5.4s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.666 total time=   3.3s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.355 total time=  17.7s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.355 total time=  15.5s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.355 total time=  15.8s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.354 total time=  15.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.354 total time=  13.5s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.751 total time=   3.0s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

In [10]:
L1Stage2 = ['E', 'F', 'H', 'L', 'A', 'I', 'T']
L2Stage2 = ['9', 'g', 'q', '4', 'P', 'p']
L3Stage2 = ['T', 'I', 'l', 'i', 't','j', '1', 'J', 'f']

L4Stage2 = ['M', 'N','m', 'n']
L5Stage2 = ['u' ,'W', 'w', 'V', 'v', 'U']
L6Stage2 = ['X', 'x', 'Y', 'y']
L7Stage2 = ['Z', 'z', '7', 'K', 'R', 'k']

L8Stage2 = ['b', 'h', 'a','3', 'B', '8', 'd', '2', '5', 's', 'S']
L9Stage2 = ['6', 'e','r','Q', 'C', 'G', 'c', '0', 'O', 'o', 'D']


In [11]:
stage2TrainLeft = ChangeLabel3(dataTrain, L1Stage2 , 'Group1', L2Stage2, 'Group2', L3Stage2, 'Group3')
stage2EvalLeft  = ChangeLabel3(dataTrain, L1Stage2 , 'Group1', L2Stage2, 'Group2', L3Stage2, 'Group3')

y_trainS2Left = stage2TrainLeft['label']
y_evalS2Left = stage2EvalLeft['label']

X_trainS2Left = stage2TrainLeft.drop("label", axis = 1)
X_evalS2Left = stage2EvalLeft.drop("label", axis = 1)

In [12]:
SVM_stage2Left, params, score = SVM(X_trainS2Left, y_trainS2Left, 'SVM_stage2Left')

y_predS2 = SVM_stage2Left.predict(X_evalS2Left)

print('Best Params of SVM_stage2Left with White: '+str(params)+'\n')
print('Best training accuracy of SVM_stage2Left with White: '+str(score)+'\n')
print("evaluation accuracy of SVM_stage2Left", metrics.accuracy_score(y_evalS2Left, y_predS2), "\n")

# KNN_stage2Left, params, score = KNN(X_trainS2Left, y_trainS2Left, 'KNN_stage2Left')

# y_predS2 = KNN_stage2Left.predict(X_evalS2Left)

# print('Best Params of KNN_stage2Left with White: '+str(params)+'\n')
# print('Best training accuracy of KNN_stage2Left with White: '+str(score)+'\n')
# print("evaluation accuracy of KNN_stage2Left", metrics.accuracy_score(y_evalS2Left, y_predS2), "\n")

# RFC_stage2Left, params, score = RFC(X_trainS2Left, y_trainS2Left, 'RFC_stage2Left')

# y_predS2 = RFC_stage2Left.predict(X_evalS2Left)

# print('Best Params of RFC_stage2Left with White: '+str(params)+'\n')
# print('Best training accuracy of RFC_stage2Left with White: '+str(score)+'\n')
# print("evaluation accuracy of RFC_stage2Left", metrics.accuracy_score(y_evalS2Left, y_predS2), "\n")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.619 total time=   0.4s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.824 total time=   0.4s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.841 total time=   0.4s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.784 total time=   0.4s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.773 total time=   0.4s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.347 total time=   1.3s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.347 total time=   1.3s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.381 total time=   1.2s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.347 total time=   1.2s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.347 total time=   1.2s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.642 total time=   0.3s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

In [13]:
stage2TrainMid = ChangeLabel4(dataTrain, L4Stage2 , 'Group1', L5Stage2, 'Group2', L6Stage2, 'Group3',L7Stage2, 'Group4')
stage2EvalMid  = ChangeLabel4(dataTrain, L4Stage2 , 'Group1', L5Stage2, 'Group2', L6Stage2, 'Group3',L7Stage2, 'Group4')

y_trainS2Mid = stage2TrainMid['label']
y_evalS2Mid = stage2EvalMid['label']

X_trainS2Mid = stage2TrainMid.drop("label", axis = 1)
X_evalS2Mid = stage2EvalMid.drop("label", axis = 1)

In [14]:
SVM_stage2Mid, params, score = SVM(X_trainS2Mid, y_trainS2Mid, 'SVM_stage2Mid')

y_predS2 = SVM_stage2Mid.predict(X_evalS2Mid)

print('Best Params of SVM_stage2Mid with White: '+str(params)+'\n')
print('Best training accuracy of SVM_stage2Mid with White: '+str(score)+'\n')
print("evaluation accuracy of SVM_stage2Mid", metrics.accuracy_score(y_evalS2Mid, y_predS2), "\n")

# KNN_stage2Mid, params, score = KNN(X_trainS2Mid, y_trainS2Mid, 'KNN_stage2Mid')

# y_predS2 = KNN_stage2Mid.predict(X_evalS2Mid)

# print('Best Params of KNN_stage2Mid with White: '+str(params)+'\n')
# print('Best training accuracy of KNN_stage2Mid with White: '+str(score)+'\n')
# print("evaluation accuracy of KNN_stage2Mid", metrics.accuracy_score(y_evalS2Mid, y_predS2), "\n")

# RFC_stage2Mid, params, score = RFC(X_trainS2Mid, y_trainS2Mid, 'RFC_stage2Mid')

# y_predS2 = RFC_stage2Mid.predict(X_evalS2Mid)

# print('Best Params of RFC_stage2Mid with White: '+str(params)+'\n')
# print('Best training accuracy of RFC_stage2Mid with White: '+str(score)+'\n')
# print("evaluation accuracy of RFC_stage2Mid", metrics.accuracy_score(y_evalS2Mid, y_predS2), "\n")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.915 total time=   0.3s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.983 total time=   0.3s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.966 total time=   0.3s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.972 total time=   0.3s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.898 total time=   0.3s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.335 total time=   1.4s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.347 total time=   1.4s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.301 total time=   1.5s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.318 total time=   1.4s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.312 total time=   1.5s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.909 total time=   0.3s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

In [15]:
stage2TrainRight = ChangeLabel2(dataTrain, L8Stage2 , 'Group1', L9Stage2, 'Group2')
stage2EvalRight  = ChangeLabel2(dataTrain, L8Stage2 , 'Group1', L9Stage2, 'Group2')

y_trainS2Right = stage2TrainRight['label']
y_evalS2Right = stage2EvalRight['label']

X_trainS2Right = stage2TrainRight.drop("label", axis = 1)
X_evalS2Right = stage2EvalRight.drop("label", axis = 1)

In [16]:
SVM_stage2Right, params, score = SVM(X_trainS2Right, y_trainS2Right, 'SVM_stage2Right')

y_predS2 = SVM_stage2Right.predict(X_evalS2Right)

print('Best Params of SVM_stage2Right with White: '+str(params)+'\n')
print('Best training accuracy of SVM_stage2Right with White: '+str(score)+'\n')
print("evaluation accuracy of SVM_stage2Right", metrics.accuracy_score(y_evalS2Right, y_predS2), "\n")

# KNN_stage2Right, params, score = KNN(X_trainS2Right, y_trainS2Right, 'KNN_stage2Right')

# y_predS2 = KNN_stage2Right.predict(X_evalS2Right)

# print('Best Params of KNN_stage2Right with White: '+str(params)+'\n')
# print('Best training accuracy of KNN_stage2Right with White: '+str(score)+'\n')
# print("evaluation accuracy of KNN_stage2Right", metrics.accuracy_score(y_evalS2Right, y_predS2), "\n")

# RFC_stage2Right, params, score = RFC(X_trainS2Right, y_trainS2Right, 'RFC_stage2Right')

# y_predS2 = RFC_stage2Right.predict(X_evalS2Right)

# print('Best Params of RFC_stage2Right with White: '+str(params)+'\n')
# print('Best training accuracy of RFC_stage2Right with White: '+str(score)+'\n')
# print("evaluation accuracy of RFC_stage2Right", metrics.accuracy_score(y_evalS2Right, y_predS2), "\n")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.820 total time=   0.4s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.799 total time=   0.4s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.902 total time=   0.4s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.891 total time=   0.4s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.881 total time=   0.4s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.670 total time=   1.2s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.500 total time=   1.2s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.737 total time=   1.1s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.518 total time=   1.1s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.549 total time=   1.2s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.928 total time=   0.3s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

STAGE 3


In [75]:
L1 = ['E', 'F', 'H', 'L', 'A', 'I', 'T']
L2 = ['9', 'g', 'q', '4', 'P', 'p']
L3 = ['T', 'I', 'l', 'i', 't','j', '1', 'J', 'f']

L4 = ['M', 'N','m', 'n']
L5 = ['u' ,'W', 'w', 'V', 'v', 'U']
L6 = ['X', 'x', 'Y', 'y']
L7 = ['Z', 'z', '7', 'K', 'R', 'k']

# L8 = ['b', 'h', 'a','3', 'B', '8', 'd', '2', '5', 's', 'S']
# L9 = ['6', 'e','r','Q', 'C', 'G', 'c', '0', 'O', 'o', 'D']

L8 = ['b', 'h','3', 'B', '8', 'd', '2', '5', 's', 'S']
L9 = ['6', 'e','r','Q', 'C', 'G', 'c', '0', 'O', 'o', 'D', 'a']


Lists = [L1, L2, L3, L4, L5, L6, L7, L8, L9]

In [76]:
def getFinalStageData(L):
  data1 = pd.DataFrame()
  data1 = data1.append(dataTrain)
  ndata1= data1.loc[data1['label'].isin(L)]

  data2 = pd.DataFrame()
  data2 = data1.append(dataEval)
  ndata2= data2.loc[data2['label'].isin(L)] 

  y_train = ndata1['label']
  X_train = ndata1.drop("label", axis = 1)

  y_eval = ndata2['label']
  X_eval = ndata2.drop("label", axis = 1)

  return X_train, y_train, X_eval, y_eval

In [77]:
i = 1
for L in Lists:
  X_train, y_train, X_eval, y_eval = getFinalStageData(L)

  tempSVC, params, score = SVM(X_train, y_train, f'FinalStageSVM{i}')
  y_pred = tempSVC.predict(X_eval)

  print(f'Best Params of FinalStageSVM{i} with White: '+str(params)+'\n')
  print(f'Best training accuracy of FinalStageSVM{i} with White: '+str(score)+'\n')
  print(f"accuracy of FinalStageSVM{i}", metrics.accuracy_score(y_eval, y_pred), "\n")
  
  # tempKNN, params, score = KNN(X_train, y_train, f'FinalStageKNN{i}')
  # y_pred = tempKNN.predict(X_eval)

  # print(f'Best Params of FinalStageKNN{i} with White: '+str(params)+'\n')
  # print(f'Best training accuracy of FinalStageKNN{i} with White: '+str(score)+'\n')
  # print(f"accuracy of FinalStageKNN{i}", metrics.accuracy_score(y_eval, y_pred), "\n")

  # tempRFC, params, score = RFC(X_train, y_train, f'FinalStageRFC{i}')
  # y_pred = tempRFC.predict(X_eval)

  # print(f'Best Params of FinalStageRFC{i} with White: '+str(params)+'\n')
  # print(f'Best training accuracy of FinalStageRFC{i} with White: '+str(score)+'\n')
  # print(f"accuracy of FinalStageRFC{i}", metrics.accuracy_score(y_eval, y_pred), "\n")

  i=i+1

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=1.000 total time=   0.1s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=1.000 total time=   0.1s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.952 total time=   0.1s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.984 total time=   0.1s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.869 total time=   0.1s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.129 total time=   0.2s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.129 total time=   0.2s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.129 total time=   0.2s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.180 total time=   0.2s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.197 total time=   0.2s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.984 total time=   0.1s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

In [78]:
#Output of the three models
def Parallel_Ensemble(input, model1, model2, model3):

  y_model1_proba = model1.predict_proba(input)
  y_model1_proba = y_model1_proba[0]
  
  y_model2_proba = model2.predict_proba(input)
  y_model2_proba = y_model2_proba[0]
  
  y_model3_proba = model3.predict_proba(input)
  y_model3_proba = y_model3_proba[0]
  

  final_y_proba = sum = [x + y for (x, y) in zip(y_model1_proba, y_model2_proba)]
  final_y_proba = sum = [x + y for (x, y) in zip(final_y_proba, y_model3_proba)]

  final_y_proba = [value / 3 for value in final_y_proba]
  final_y_proba = [round(member, 2) for member in final_y_proba]

  predictions = ['Group1', 'Group2', 'Group3', 'Group4']

  index = final_y_proba.index(max(final_y_proba))

  toReturn1 = predictions[index]
  toReturn2 = max(final_y_proba)  
  return toReturn1, toReturn2

#Output of the three models
def Final_Parallel_Ensemble(input, model1, model2, model3, listOfCharacters):

  y_model1_proba = model1.predict_proba(input)
  y_model1_proba = y_model1_proba[0]
  
  y_model2_proba = model2.predict_proba(input)
  y_model2_proba = y_model2_proba[0]
  
  y_model3_proba = model3.predict_proba(input)
  y_model3_proba = y_model3_proba[0]
  

  final_y_proba = sum = [x + y for (x, y) in zip(y_model1_proba, y_model2_proba)]
  final_y_proba = sum = [x + y for (x, y) in zip(final_y_proba, y_model3_proba)]

  final_y_proba = [value / 3 for value in final_y_proba]
  final_y_proba = [round(member, 2) for member in final_y_proba]

  index = final_y_proba.index(max(final_y_proba))

  toReturn1 = listOfCharacters[index]
  toReturn2 = max(final_y_proba)  
  return toReturn1, toReturn2

ENSEMBLE

In [79]:
L1 = ['E', 'F', 'H', 'L', 'A', 'I', 'T']
L2 = ['9', 'g', 'q', '4', 'P', 'p']
L3 = ['T', 'I', 'l', 'i', 't','j', '1', 'J', 'f']

L4 = ['M', 'N','m', 'n']
L5 = ['u' ,'W', 'w', 'V', 'v', 'U']
L6 = ['X', 'x', 'Y', 'y']
L7 = ['Z', 'z', '7', 'K', 'R', 'k']

# L8 = ['b', 'h','3', 'B', '8', 'd', '2', '5', 's', 'S']
# L9 = ['6', 'e','r','Q', 'C', 'G', 'c', '0', 'O', 'o', 'D', 'a']


L8 = ['b', 'h','3', 'B', '8', 'd', '2', '5', 's', 'S']
L9 = ['6', 'e','r','Q', 'C', 'G', 'c', '0', 'O', 'o', 'D', 'a']

Lists = [L1, L2, L3, L4, L5, L6, L7, L8, L9]

SVM_stage1 = joblib.load("SVM_stage1.npy")
# KNN_stage1 = joblib.load("KNN_stage1.npy")
# RFC_stage1 = joblib.load("RFC_stage1.npy")

SVM_stage2Left = joblib.load("SVM_stage2Left.npy")
SVM_stage2Mid = joblib.load("SVM_stage2Mid.npy")
SVM_stage2Right = joblib.load("SVM_stage2Right.npy")

# KNN_stage2Left = joblib.load("KNN_stage2Left.npy")
# KNN_stage2Mid = joblib.load("KNN_stage2Mid.npy")
# KNN_stage2Right = joblib.load("KNN_stage2Right.npy")

# RFC_stage2Left = joblib.load("RFC_stage2Left.npy")
# RFC_stage2Mid = joblib.load("RFC_stage2Mid.npy")
# RFC_stage2Right = joblib.load("RFC_stage2Right.npy")

FinalStageSVM1 = joblib.load("FinalStageSVM1.npy")
FinalStageSVM2 = joblib.load("FinalStageSVM2.npy")
FinalStageSVM3 = joblib.load("FinalStageSVM3.npy")
FinalStageSVM4 = joblib.load("FinalStageSVM4.npy")
FinalStageSVM5 = joblib.load("FinalStageSVM5.npy")
FinalStageSVM6 = joblib.load("FinalStageSVM6.npy")
FinalStageSVM7 = joblib.load("FinalStageSVM7.npy")
FinalStageSVM8 = joblib.load("FinalStageSVM8.npy")
FinalStageSVM9 = joblib.load("FinalStageSVM9.npy")

# FinalStageKNN1 = joblib.load("FinalStageKNN1.npy")
# FinalStageKNN2 = joblib.load("FinalStageKNN2.npy")
# FinalStageKNN3 = joblib.load("FinalStageKNN3.npy")
# FinalStageKNN4 = joblib.load("FinalStageKNN4.npy")
# FinalStageKNN5 = joblib.load("FinalStageKNN5.npy")
# FinalStageKNN6 = joblib.load("FinalStageKNN6.npy")
# FinalStageKNN7 = joblib.load("FinalStageKNN7.npy")
# FinalStageKNN8 = joblib.load("FinalStageKNN8.npy")
# FinalStageKNN9 = joblib.load("FinalStageKNN9.npy")

# FinalStageRFC1 = joblib.load("FinalStageRFC1.npy")
# FinalStageRFC2 = joblib.load("FinalStageRFC2.npy")
# FinalStageRFC3 = joblib.load("FinalStageRFC3.npy")
# FinalStageRFC4 = joblib.load("FinalStageRFC4.npy")
# FinalStageRFC5 = joblib.load("FinalStageRFC5.npy")
# FinalStageRFC6 = joblib.load("FinalStageRFC6.npy")
# FinalStageRFC7 = joblib.load("FinalStageRFC7.npy")
# FinalStageRFC8 = joblib.load("FinalStageRFC8.npy")
# FinalStageRFC9 = joblib.load("FinalStageRFC9.npy")

In [80]:
def SequentialModel(input):
  y_stage1, p_y_stage1 = Parallel_Ensemble(input, SVM_stage1, KNN_stage1, RFC_stage1)
  
  if y_stage1 == 'Group1':
    y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Left, KNN_stage2Left, RFC_stage2Left)
    p_y_stage2 *= p_y_stage1
    
    if y_stage2 == 'Group1':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM1, FinalStageKNN1, FinalStageRFC1, L1)
      p_y_stage3 *= p_y_stage2

    elif y_stage2 == 'Group2':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM2, FinalStageKNN2, FinalStageRFC2, L2)
      p_y_stage3 *= p_y_stage2

    else:
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM3, FinalStageKNN3, FinalStageRFC3, L3) 
      p_y_stage3 *= p_y_stage2

  elif y_stage1 == 'Group2':
    y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Mid, KNN_stage2Mid, RFC_stage2Mid)
    p_y_stage2 *= p_y_stage1

    if y_stage2 == 'Group1':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM4, FinalStageKNN4, FinalStageRFC4, L4)
      p_y_stage3 *= p_y_stage2

    elif y_stage2 == 'Group2':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM5, FinalStageKNN5, FinalStageRFC5, L5)
      p_y_stage3 *= p_y_stage2

    elif y_stage2 == 'Group3':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM6, FinalStageKNN6, FinalStageRFC6, L6) 
      p_y_stage3 *= p_y_stage2

    else:
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM7, FinalStageKNN7, FinalStageRFC7, L7) 
      p_y_stage3 *= p_y_stage2

  else:
    y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Right, KNN_stage2Right, RFC_stage2Right)
    p_y_stage2 *= p_y_stage1

    if y_stage2 == 'Group1':
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM8, FinalStageKNN8, FinalStageRFC8, L8)
      p_y_stage3 *= p_y_stage2

    else:
      y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM9, FinalStageKNN9, FinalStageRFC2, L9)
      p_y_stage3 *= p_y_stage2

  return y_stage3

In [81]:
def SequentialModel2(input):
  y_stage1= SVM_stage1.predict(input)
  
  if y_stage1 == 'Group1':
    # y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Left, KNN_stage2Left, RFC_stage2Left)
    y_stage2= SVM_stage2Left.predict(input)
    
    if y_stage2 == 'Group1':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM1, FinalStageKNN1, FinalStageRFC1, L1)
      # p_y_stage3 *= p_y_stage2
      y_stage3= FinalStageSVM1.predict(input)

    elif y_stage2 == 'Group2':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM2, FinalStageKNN2, FinalStageRFC2, L2)
      # p_y_stage3 *= p_y_stage2
      y_stage3= FinalStageSVM2.predict(input)

    else:
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM3, FinalStageKNN3, FinalStageRFC3, L3) 
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM3.predict(input)

  elif y_stage1 == 'Group2':
    # y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Mid, KNN_stage2Mid, RFC_stage2Mid)
    # p_y_stage2 *= p_y_stage1
    y_stage2 = SVM_stage2Mid.predict(input)
    if y_stage2 == 'Group1':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM4, FinalStageKNN4, FinalStageRFC4, L4)
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM4.predict(input)

    elif y_stage2 == 'Group2':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM5, FinalStageKNN5, FinalStageRFC5, L5)
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM5.predict(input)

    elif y_stage2 == 'Group3':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM6, FinalStageKNN6, FinalStageRFC6, L6) 
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM6.predict(input)

    else:
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM7, FinalStageKNN7, FinalStageRFC7, L7) 
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM7.predict(input)

  else:
    # y_stage2, p_y_stage2 = Parallel_Ensemble(input, SVM_stage2Right, KNN_stage2Right, RFC_stage2Right)
    # p_y_stage2 *= p_y_stage1
    y_stage2 = SVM_stage2Right.predict(input)

    if y_stage2 == 'Group1':
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM8, FinalStageKNN8, FinalStageRFC8, L8)
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM8.predict(input)

    else:
      # y_stage3, p_y_stage3 = Final_Parallel_Ensemble(input, FinalStageSVM9, FinalStageKNN9, FinalStageRFC2, L9)
      # p_y_stage3 *= p_y_stage2
      y_stage3 = FinalStageSVM9.predict(input)
  return y_stage3

In [30]:
# !zip -r /content/FinalEnsemble2.zip /content/

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2022.10.28/ (stored 0%)
  adding: content/.config/logs/2022.10.28/13.35.23.262379.log (deflated 91%)
  adding: content/.config/logs/2022.10.28/13.37.01.262813.log (deflated 53%)
  adding: content/.config/logs/2022.10.28/13.36.19.156679.log (deflated 86%)
  adding: content/.config/logs/2022.10.28/13.37.00.362876.log (deflated 54%)
  adding: content/.config/logs/2022.10.28/13.35.53.106765.log (deflated 53%)
  adding: content/.config/logs/2022.10.28/13.36.34.707746.log (deflated 53%)
  adding: content/.config/gce (stored 0%)
  adding: content/.config/.last_update_check.json (deflated 23%)
  adding: content/.config/.last

In [82]:
def getYEvaluation ():
    df = pd.read_csv('EvalProfileWhite.csv')
    image_names = df.iloc[:,0]
    image_names = image_names.values.tolist()
    print(len(image_names))

    y_output = []

    for image in image_names:
        df_englishCSV = pd.read_csv('english.csv')
        label = df_englishCSV.loc[df_englishCSV['image'] == image]
        label = label.drop(['image'], axis=1)
        label = label.values.tolist()
        y_output.append(label[0][0])

    return y_output


def getFeatureVector():
    df = pd.read_csv('EvalProfileWhite.csv')
    df = df.drop(['image', 'label'], axis=1)
    feature_vectors = df.values.tolist()
    return feature_vectors

import warnings
warnings.filterwarnings("ignore")
predicted_output = []
feature_Vector = getFeatureVector()
actual_output = getYEvaluation()

for idx, image in enumerate(feature_Vector):
    feature = []
    feature.append(image)
    predicted_y = SequentialModel2(feature)
    predicted_output.append(predicted_y)

    print(f'predicted output : {predicted_y} | {actual_output[idx]} :Actual output')



682
predicted output : ['0'] | 0 :Actual output
predicted output : ['O'] | 0 :Actual output
predicted output : ['0'] | 0 :Actual output
predicted output : ['O'] | 0 :Actual output
predicted output : ['O'] | 0 :Actual output
predicted output : ['O'] | 0 :Actual output
predicted output : ['0'] | 0 :Actual output
predicted output : ['o'] | 0 :Actual output
predicted output : ['o'] | 0 :Actual output
predicted output : ['0'] | 0 :Actual output
predicted output : ['O'] | 0 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['I'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['1'] | 1 :Actual output
predicted output : ['2'] | 2

In [83]:
print(metrics.confusion_matrix(actual_output, predicted_output), "\n")
print("accuracy", metrics.accuracy_score(actual_output, predicted_output), "\n")

[[ 4  0  0 ...  0  0  0]
 [ 0 10  0 ...  0  0  0]
 [ 0  0 10 ...  0  0  1]
 ...
 [ 0  0  0 ...  8  0  0]
 [ 0  0  0 ...  0  8  0]
 [ 0  0  0 ...  0  0  6]] 

accuracy 0.8240469208211144 

