# Import Required Packages

In [1]:
import numpy as np
import scipy.io
import sklearn.metrics
import sklearn 
import os
import random
import pandas as pd
import time
random.seed(2021)

# Step 0: set work directories

In [None]:
train_dir = '../data/train_set/'
image_dir = train_dir+"images/"
pt_dir = train_dir+"points/"
label_path = train_dir+"label.csv"

Provide directories for training images. 
Training images and Training fiducial points will be in different subfolders.

# Step 1: set up controls for evaluation experiments

In this chunk, we have a set of controls for the evaluation experiments.

• (T/F) initial feature extraction on training set
• (T/F) initial feature extraction on test set
• (T/F) improved feature extraction on training set
• (T/F) improved feature extraction on test set
• (T/F) SMOTE using improved features on train set


In [61]:
run_feature_train_initial = True
run_feature_test_initial = True

run_feature_train = True # process features for training set
run_feature_test = True # process features for test set

run_feature_train_SMOTE = True

In this chunk,we have a set of controls for model training/testing. 
If true, then we train the model and generate predictions on the test set, and if false, then we skip that model. 
By default all the models are set to run.

In [None]:
run_baseline = True
run_advanced = True

run_baseline_improved = True
run_knn = True
run_random_forest=True
run_svm = True
run_weighted_svm = True
run_stochastic_gradient_descent = True
run_neural_networks = True
run_decision_tree = True

fold cross validation with AUC scoring 

In [None]:
run_cv = False

# Step 2:  import data and train-test split

Fiducial points are stored in matlab format. In this step, we read them and store them in a list.

In [5]:
def read_data(train_dir):
  
  # read labels
  labels = pd.read_csv(train_dir+'/label.csv')
  y= labels['label'].to_numpy()

  # read points
  n = 3000
  for i in range(1,n+1):
    p_path = str(i).zfill(4)+'.mat'
    mat = scipy.io.loadmat(train_dir+'/points/'+p_path)
    if 'faceCoordinatesUnwarped' in mat:
      cords = mat['faceCoordinatesUnwarped'] 
    else:
      cords = mat['faceCoordinates2']

    distance = sklearn.metrics.pairwise_distances(cords)       
          # compute the pairwise distances in each mat
    flatten_distance = distance[np.triu_indices(len(cords[:,0]), k = 1)]    
          # stretch the upper triangle of the symmetric matrix 
          # to a long array with dimension 3003
          # 3003 = (1+77)*78/2
    if i==1:
      distances = np.mat([flatten_distance])
    else:
      distances = np.append(distances, np.mat([flatten_distance]), axis = 0)
  return (distances, y)

In [None]:
read_time_start=time.time()
X, Y = read_data(train_dir)
print("Read the original dataset takes %s seconds" % round((time.time() - read_time_start),3))

In [None]:
X.shape, Y.shape 

Data Preprocessing Imbalanced Dataset (SMOTE)-oversamplig

In [28]:
print('majority class: %d' % np.sum(Y == 0))
print('minority class: %d' % np.sum(Y == 1))
#imbalanced dataset

majority class: 2402
minority class: 598


In [37]:
def data_preprocessing(X, Y, train_dir):

  distances = X
  y = Y

  n = y.shape[0]
  mat_1 = np.add(np.where(y == 1),1)
  n_oversample = (n-sum(y))-sum(y) 
    # how many samples do we need to generate

  for i in range(n_oversample):
    samples_index = random.sample(list(list(mat_1)[0]), 2)
      # pick two random index of class 1 samples. 

    p_path = str(samples_index[0]).zfill(4)+'.mat'
    mat = scipy.io.loadmat(train_dir+'/points/'+p_path)
    if 'faceCoordinatesUnwarped' in mat:
      cords_0 = mat['faceCoordinatesUnwarped'] 
    else:
      cords_0 = mat['faceCoordinates2']
    
    p_path = str(samples_index[1]).zfill(4)+'.mat'
    mat = scipy.io.loadmat(train_dir+'/points/'+p_path)
    if 'faceCoordinatesUnwarped' in mat:
      cords_1 = mat['faceCoordinatesUnwarped'] 
    else:
      cords_1 = mat['faceCoordinates2']

    cords_new = (cords_0 + cords_1) / 2 
        # averaging two sets of cordinates to generate new set of cordinates
    distance = sklearn.metrics.pairwise_distances(cords_new)
        # compute the pairwise distances in each mat
    flatten_distance = distance[np.triu_indices(len(cords_new[:,0]), k = 1)]
        # stretch the upper triangle of the symmetric matrix 
        # to a long array with dimension 3003
        # 3003 = (1+77)*78/2
    
    distances = np.append(distances, np.mat([flatten_distance]), axis = 0)
    y = np.append(y,np.array(1))
        # Append new data to the original dataset

  return (distances, y)

In [38]:
Balanced_X, Blanced_Y = data_preprocessing(X, Y, train_dir)

In [39]:
Balanced_X.shape, Blanced_Y.shape

((4804, 3003), (4804,))

train-test split
since we can see that the dataset is imbalanced, in this chunk we do an 80-20 train-test split

In [42]:
import numpy as np
import matplotlib.pyplot as plt
import smote_variants as sv
import imbalanced_databases as imbd

In [43]:
#split train and test with 80-20
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(Balanced_X, Blanced_Y,test_size=0.2,random_state=0)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

# Step 3: construct features and responses

GBM model goes here (both original and updated)

• The follow plots show how pairwise distance between fiducial points can work as feature for facial emotion recognition.

    – In the first column, 78 fiducials points of each emotion are marked in order.
    – In the second column distributions of vertical distance between right pupil(1) and right brow peak(21) are shown in histograms. For example, the distance of an angry face tends to be shorter than that of a surprised face.
    – The third column is the distributions of vertical distances between right mouth corner(50) and the midpoint of the upper lip(52). For example, the distance of an happy face tends to be shorter than that of a sad face.

this is step is to identify and improve features 


# # Step 4: GBM

In [None]:
# Save best gbm model
pickle.dump(gbm_best, open("../output/baseline_gbm.p",'wb'))


In [None]:
#Save trained weighted SVM model
pickle.dump(weighted_svm_best, open("../output/baseline_gbm.p",'wb','wb'))


# Step 5: Train a classification model with training features and responses

apply advanced model to fit the training data

# Step 5: Run test on test images

run trained model with test data and evaluate

# SVM 

In [50]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sklearn
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV,RepeatedStratifiedKFold,cross_val_score
from sklearn.neighbors import KNeighborsClassifier
import scipy.io
import pickle
import os, sys
from scipy.spatial.distance import pdist
import time 
import xlsxwriter
from sklearn.metrics import accuracy_score, classification_report,make_scorer, confusion_matrix,roc_auc_score
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
#from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from sklearn import ensemble
from sklearn.svm import SVC
import random
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Input, Dropout
from keras.layers import BatchNormalization
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam
from keras.utils import to_categorical

In [60]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid_svm = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
grid_svm.fit(X_train,y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .....................C=0.1, gamma=1, kernel=rbf; total time=  28.5s
[CV 2/5] END .....................C=0.1, gamma=1, kernel=rbf; total time=  24.6s
[CV 3/5] END .....................C=0.1, gamma=1, kernel=rbf; total time=  26.3s
[CV 4/5] END .....................C=0.1, gamma=1, kernel=rbf; total time=  25.7s
[CV 5/5] END .....................C=0.1, gamma=1, kernel=rbf; total time=  27.2s
[CV 1/5] END ...................C=0.1, gamma=0.1, kernel=rbf; total time=  26.8s
[CV 2/5] END ...................C=0.1, gamma=0.1, kernel=rbf; total time=  27.4s
[CV 3/5] END ...................C=0.1, gamma=0.1, kernel=rbf; total time=  26.9s
[CV 4/5] END ...................C=0.1, gamma=0.1, kernel=rbf; total time=  26.2s
[CV 5/5] END ...................C=0.1, gamma=0.1, kernel=rbf; total time=  26.3s
[CV 1/5] END ..................C=0.1, gamma=0.01, kernel=rbf; total time=  27.4s
[CV 2/5] END ..................C=0.1, gamma=0.0

[CV 2/5] END ....................C=1000, gamma=1, kernel=rbf; total time=  24.3s
[CV 3/5] END ....................C=1000, gamma=1, kernel=rbf; total time=  24.1s
[CV 4/5] END ....................C=1000, gamma=1, kernel=rbf; total time=  24.5s
[CV 5/5] END ....................C=1000, gamma=1, kernel=rbf; total time=  24.3s
[CV 1/5] END ..................C=1000, gamma=0.1, kernel=rbf; total time=  24.2s
[CV 2/5] END ..................C=1000, gamma=0.1, kernel=rbf; total time=  24.2s
[CV 3/5] END ..................C=1000, gamma=0.1, kernel=rbf; total time=  24.3s
[CV 4/5] END ..................C=1000, gamma=0.1, kernel=rbf; total time=  24.3s
[CV 5/5] END ..................C=1000, gamma=0.1, kernel=rbf; total time=  24.2s
[CV 1/5] END .................C=1000, gamma=0.01, kernel=rbf; total time=  24.0s
[CV 2/5] END .................C=1000, gamma=0.01, kernel=rbf; total time=  24.0s
[CV 3/5] END .................C=1000, gamma=0.01, kernel=rbf; total time=  23.3s
[CV 4/5] END ...............

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf']},
             verbose=3)

In [61]:
print(grid_svm.best_params_)
print(grid_svm.best_estimator_)

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
SVC(C=10, gamma=0.0001)


In [62]:
#Train SVM using best parameters
svm_best = SVC(C=10,gamma=0.0001,kernel='rbf',probability=True) 
start_time=time.time()
svm_best.fit(X_train, y_train)

print("Training  model takes %s seconds" % round((time.time() - start_time),3))
print('Accuracy of SVM on test set: {:.3f}'.format(svm_best.score(X_test,y_test)))

start = time.time()
svm_pred = svm_best.predict(X_test)
end = time.time()

svm_predprob = svm_best.predict_proba(X_test)[:,1]

print("Predicting test data takes %s seconds" % round((end - start),3))
print('Classification error rate:', np.mean(np.array(y_test)!= svm_pred))
print('Classification report \n', classification_report(y_test, svm_pred))

print('Confusion Matrix \n', confusion_matrix(y_test, svm_pred))
print('AUC is: {:.4f}'.format(roc_auc_score(y_test, svm_predprob)))

Training  model takes 150.439 seconds
Accuracy of SVM on test set: 0.519
Predicting test data takes 12.722 seconds
Classification error rate: 0.48074921956295524
Classification report 
               precision    recall  f1-score   support

           0       0.92      0.05      0.09       484
           1       0.51      1.00      0.67       477

    accuracy                           0.52       961
   macro avg       0.72      0.52      0.38       961
weighted avg       0.72      0.52      0.38       961

Confusion Matrix 
 [[ 24 460]
 [  2 475]]
AUC is: 0.5340


In [76]:
#Save trained SVM model
pickle.dump(svm_best, open("../output/best_svm.p",'wb'))

weighted SVM

In [66]:
weighted_svm = SVC(gamma = 'scale', class_weight = 'balanced')

#CV Weighted SVM 
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(weighted_svm, X, Y, scoring='roc_auc', cv=cv, n_jobs=-1)
# summarize performance
print('Mean ROC AUC: %.3f' % np.mean(scores))

Mean ROC AUC: 0.708


In [68]:
balance = [{0:598.0, 1:2402.0},{0:1,1:100}, {0:1,1:10}, {0:1,1:1}, {0:10,1:1}, {0:100,1:1}]
param_grid = dict(class_weight=balance)

grid_weightedsvm = GridSearchCV(estimator=weighted_svm, param_grid=param_grid, n_jobs=-1, cv=cv, scoring='roc_auc')
grid_weightedsvm.fit(X,Y)
grid_weightedsvm.best_params_
grid_weightedsvm.best_estimator_
grid_weightedsvm.best_params_
grid_weightedsvm.best_estimator_

SVC(class_weight={0: 598.0, 1: 2402.0})

In [70]:
print("Best: %f using %s" % (grid_weightedsvm.best_score_, grid_weightedsvm.best_params_))
# report all configurations
means = grid_weightedsvm.cv_results_['mean_test_score']
stds = grid_weightedsvm.cv_results_['std_test_score']
params = grid_weightedsvm.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.827703 using {'class_weight': {0: 598.0, 1: 2402.0}}
0.827703 (0.030973) with: {'class_weight': {0: 598.0, 1: 2402.0}}
0.792564 (0.035455) with: {'class_weight': {0: 1, 1: 100}}
0.782922 (0.031263) with: {'class_weight': {0: 1, 1: 10}}
0.798206 (0.034647) with: {'class_weight': {0: 1, 1: 1}}
0.796063 (0.030676) with: {'class_weight': {0: 10, 1: 1}}
0.796048 (0.030652) with: {'class_weight': {0: 100, 1: 1}}


In [73]:
weighted_svm_best = SVC(gamma = 'scale', class_weight ={0: 598.0, 1: 2402.0},probability=True)
start_time=time.time()
weighted_svm_best.fit(X, Y)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

print('Accuracy of weighted SVM on test set: {:.3f}'.format(weighted_svm_best.score(X,Y)))

start = time.time()
weighted_svm_pred = weighted_svm_best.predict(X_test)
end = time.time()
print(weighted_svm_pred[0:5,])

weighted_svm_predprob = weighted_svm_best.predict_proba(X_test)[:,1]

print("Predicting test data takes %s seconds" % round((end - start),3))
print('Classification error rate:', np.mean(np.array(y_test)!= weighted_svm_pred))
print('Classification report \n', classification_report(y_test, weighted_svm_pred))

print('Confusion Matrix \n', confusion_matrix(y_test, weighted_svm_pred))
print('AUC is: {:.4f}'.format(roc_auc_score(y_test, weighted_svm_predprob)))

Training  model takes 62.867 seconds
Accuracy of weighted SVM on test set: 0.863
[0 1 1 0 0]
Predicting test data takes 4.626 seconds
Classification error rate: 0.11446409989594172
Classification report 
               precision    recall  f1-score   support

           0       0.92      0.84      0.88       484
           1       0.85      0.93      0.89       477

    accuracy                           0.89       961
   macro avg       0.89      0.89      0.89       961
weighted avg       0.89      0.89      0.89       961

Confusion Matrix 
 [[408  76]
 [ 34 443]]
AUC is: 0.9465


In [80]:
#Save trained weighted SVM model
pickle.dump(weighted_svm_best, open("../output/best_weighted_svm.p",'wb'))

#Load weighted SVM model
pickle.load(open("../output/best_weighted_svm.p",'rb'))

In [84]:
#Load SVM balanced model
pickle.load(open("../output/best_svm.p",'rb'))


SVC(C=10, gamma=0.0001, probability=True)

# Other models (optional)

In [54]:
#knn
from sklearn.neighbors import NearestCentroid
import numpy as np
from sklearn.metrics import classification_report

clf = NearestCentroid()
start_time=time.time()
clf.fit(X_train, y_train)
NearestCentroid()
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

#Save trained knn model
pickle.dump(weighted_svm_best, open("../output/best_weighted_svm.p",'wb'))

#Load weighted SVM model
pickle.load(open("../output/best_weighted_svm.p",'rb'))

Training  model takes 0.073 seconds
Predicting test data takes 0.018 seconds
              precision    recall  f1-score   support

           0       0.83      0.53      0.65       479
           1       0.24      0.59      0.34       121

    accuracy                           0.54       600
   macro avg       0.54      0.56      0.49       600
weighted avg       0.71      0.54      0.58       600



In [55]:
from sklearn.neighbors import (NeighborhoodComponentsAnalysis,KNeighborsClassifier)
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.pipeline import Pipeline

for k in range(3,10):
    nca = NeighborhoodComponentsAnalysis(random_state=42)
    knn = KNeighborsClassifier(n_neighbors=k)
    nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
    nca_pipe.fit(X_train, y_train)
    pre=nca_pipe.predict(X_test)
    # Pipeline(...)
    print('the score of a model with k = %d is %f' % (k, nca_pipe.score(X_test, y_test)))
    print('the recall of a model with k = %d is %f' % (k, recall_score(y_test, pre)))
    
    

from sklearn.neighbors import (NeighborhoodComponentsAnalysis,KNeighborsClassifier)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
k=3
nca = NeighborhoodComponentsAnalysis(random_state=42)
knn = KNeighborsClassifier(n_neighbors=k)
nca_pipe = Pipeline([('nca', nca), ('knn', knn)])
nca_pipe.fit(X_train, y_train)
# Pipeline(...)
pre=nca_pipe.predict(X_test)
# Pipeline(...)
print('the score of a model with k = %d is %f' % (k, nca_pipe.score(X_test, y_test)))
print('the recall of a model with k = %d is %f' % (k, recall_score(y_test, pre)))


from sklearn.metrics import classification_report

pre=nca_pipe.predict(X_test)
print(classification_report(y_test,pre))

the score of a model with k = 3 is 0.776667
the recall of a model with k = 3 is 0.223140
the score of a model with k = 4 is 0.800000
the recall of a model with k = 4 is 0.090909
the score of a model with k = 5 is 0.786667
the recall of a model with k = 5 is 0.148760
the score of a model with k = 6 is 0.800000
the recall of a model with k = 6 is 0.082645
the score of a model with k = 7 is 0.790000
the recall of a model with k = 7 is 0.115702
the score of a model with k = 8 is 0.810000
the recall of a model with k = 8 is 0.082645
the score of a model with k = 9 is 0.803333
the recall of a model with k = 9 is 0.132231
the score of a model with k = 3 is 0.776667
the recall of a model with k = 3 is 0.223140
              precision    recall  f1-score   support

           0       0.82      0.92      0.87       479
           1       0.40      0.22      0.29       121

    accuracy                           0.78       600
   macro avg       0.61      0.57      0.58       600
weighted avg    

In [70]:
# Stochastic Gradient Descent
# SGD with penalty=l1

from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss="log", penalty="l1", max_iter=200, shuffle=True, class_weight='balanced')
start_time=time.time()
clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 4.778 seconds
Predicting test data takes 0.008 seconds
              precision    recall  f1-score   support

           0       0.92      0.64      0.76       479
           1       0.35      0.78      0.49       121

    accuracy                           0.67       600
   macro avg       0.64      0.71      0.62       600
weighted avg       0.81      0.67      0.70       600



In [57]:
# SGD with penalty=12
from sklearn.linear_model import SGDClassifier

clf = SGDClassifier(loss="log", penalty="l2", max_iter=200, shuffle=True, class_weight='balanced')
start_time=time.time()
clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 2.268 seconds
Predicting test data takes 0.005 seconds
              precision    recall  f1-score   support

           0       1.00      0.03      0.06       479
           1       0.21      1.00      0.34       121

    accuracy                           0.23       600
   macro avg       0.60      0.52      0.20       600
weighted avg       0.84      0.23      0.12       600



In [69]:
# Neural Networks
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False,
              epsilon=1e-08, hidden_layer_sizes=(5,2),
              learning_rate='constant', learning_rate_init=0.001,
              max_iter=200, momentum=0.9, n_iter_no_change=10,
              nesterovs_momentum=True, power_t=0.5, random_state=2,
              shuffle=True, solver='lbfgs', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)
start_time=time.time()
clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 0.296 seconds
Predicting test data takes 0.005 seconds
              precision    recall  f1-score   support

           0       0.80      1.00      0.89       479
           1       0.00      0.00      0.00       121

    accuracy                           0.80       600
   macro avg       0.40      0.50      0.44       600
weighted avg       0.64      0.80      0.71       600



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [71]:
# Decision Tree
from sklearn import tree

clf = tree.DecisionTreeClassifier(max_depth = 30, min_samples_leaf=2, max_leaf_nodes=3, class_weight='balanced')
start_time=time.time()
clf = clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 2.673 seconds
Predicting test data takes 0.008 seconds
              precision    recall  f1-score   support

           0       0.86      0.71      0.77       479
           1       0.31      0.53      0.39       121

    accuracy                           0.67       600
   macro avg       0.58      0.62      0.58       600
weighted avg       0.75      0.67      0.70       600



In [72]:
# Random Forest & Adaboost

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=300, class_weight='balanced')
start_time=time.time()
clf = clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start = time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 29.645 seconds
Predicting test data takes 0.122 seconds
              precision    recall  f1-score   support

           0       0.81      0.99      0.89       479
           1       0.76      0.11      0.19       121

    accuracy                           0.81       600
   macro avg       0.79      0.55      0.54       600
weighted avg       0.80      0.81      0.75       600



In [74]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=200)
start_time=time.time()
clf = clf.fit(X_train, y_train)
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start_time=time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 318.494 seconds
Predicting test data takes 645.113 seconds
              precision    recall  f1-score   support

           0       0.86      0.90      0.88       479
           1       0.53      0.42      0.47       121

    accuracy                           0.81       600
   macro avg       0.69      0.66      0.67       600
weighted avg       0.79      0.81      0.80       600



In [75]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=100)
start_time=time.time()
clf = clf.fit(X_train, y_train)
#scores = cross_val_score(clf, img_set, label, cv=5)
#print('the 5-fold cross validation score for AdaBoost with 100 estimators is %f' % scores.mean())
print("Training  model takes %s seconds" % round((time.time() - start_time),3))

from sklearn.metrics import classification_report
start_time=time.time()
pre=clf.predict(X_test)
end = time.time()
print("Predicting test data takes %s seconds" % round((end - start),3))
print(classification_report(y_test,pre))

Training  model takes 202.349 seconds
Predicting test data takes 848.637 seconds
              precision    recall  f1-score   support

           0       0.84      0.92      0.88       479
           1       0.49      0.32      0.39       121

    accuracy                           0.80       600
   macro avg       0.67      0.62      0.63       600
weighted avg       0.77      0.80      0.78       600



# Model results