<a href="https://colab.research.google.com/github/IdjiotSandwiches/knn-fer/blob/create-model/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dagshub --quiet
!pip install mlflow --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.2/252.2 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.2/203.2 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.5/49.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.0/74.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.3/27.3 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mlflow
import dagshub
import os
import pathlib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold, KFold

In [2]:
from google.colab import drive
drive.mount('/content/drive')

%cd "/content/drive/MyDrive/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive


In [3]:
PATH = pathlib.Path('facial-emotion-recognition')
LABELS = os.listdir(PATH)

In [4]:
dagshub.init(repo_owner='IdjiotSandwiches', repo_name='knn-fer', mlflow=True)

In [5]:
K_SIZE = (31,31)
SIGMA = 4
THETA_RANGE = np.arange(0, np.pi, np.pi/32)
LAMBD = 10.0
GAMMA = 0.5
PSI = 0

gabor_params = {
    'ksize': K_SIZE,
    'sigma': SIGMA,
    'lambd': LAMBD,
    'gamma': GAMMA,
    'psi': PSI
}

In [None]:
def gabor_filter(img):
  features = []
  for theta in THETA_RANGE:
    kernel = cv.getGaborKernel(**gabor_params, theta=theta)
    filter = cv.filter2D(img, cv.CV_64F, kernel)
    features.append(filter)

  return features

In [None]:
def open_dataset(dir):
  images, labels = [], []

  for i, label in enumerate(LABELS):
    path = f'{dir}/{label}'
    for j, img_path in enumerate(os.listdir(path)):
      img = cv.imread(f'{path}/{img_path}')
      img = cv.resize(img, (48,48))
      img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
      img = cv.GaussianBlur(img,(5,5),0)
      img = cv.equalizeHist(img)
      img = img / 255.0
      img = gabor_filter(img)
      images.append(img)
      labels.append(i)

  return np.array(images), np.array(labels)

In [None]:
def save_filtered_img(images, labels):
  folder_path = f'/content/drive/MyDrive/gabor-filtered-imgs'
  os.makedirs(folder_path, exist_ok=True)

  np.save(f'{folder_path}/images.npy', images)
  np.save(f'{folder_path}/labels.npy', labels)

  print('Ok!')

In [None]:
images, labels = open_dataset(PATH)
images = images.reshape(images.shape[0], -1)
save_filtered_img(images, labels)

In [6]:
images = np.load('/content/drive/MyDrive/gabor-filtered-imgs/images.npy')
labels = np.load('/content/drive/MyDrive/gabor-filtered-imgs/labels.npy')

In [9]:
TEST_SIZE = 0.3
CALIB_SIZE = 0.5
RANDOM_STATE = 42

split_params = {
    'test_size': TEST_SIZE,
    'random_state': RANDOM_STATE
}

calib_params = {
    'test_size': CALIB_SIZE,
    'random_state': RANDOM_STATE
}

X_train, X_test, y_train, y_test = train_test_split(images, labels, **split_params)
X_test, X_calib, y_test, y_calib = train_test_split(X_test, y_test, **calib_params)

In [7]:
N_NEIGHBORS = 9
WEIGHTS = 'distance'
METRIC = 'manhattan'
ALGORITHM = 'ball_tree'

model_params = {
  'n_neighbors': N_NEIGHBORS,
  'weights': WEIGHTS,
  'metric': METRIC,
  'algorithm': ALGORITHM
}

model = KNeighborsClassifier(**model_params)
scores = cross_val_score(model, images, labels, cv=5)
print(scores)

[0.39719626 0.42429907 0.39906542 0.43271028 0.41121495]


In [None]:
N_NEIGHBORS = 9
WEIGHTS = 'distance'
METRIC = 'manhattan'
ALGORITHM = 'ball_tree'
TEST_SIZE = 0.5
RANDOM_STATE = 42
N_SPLITS = 10
SHUFFLE = True

model_params = {
  'n_neighbors': N_NEIGHBORS,
  'weights': WEIGHTS,
  'metric': METRIC,
  'algorithm': ALGORITHM
}

calib_params = {
  'test_size': TEST_SIZE,
  'random_state': RANDOM_STATE
}

kfold_params = {
    'n_splits': N_SPLITS,
    'shuffle': SHUFFLE
}

skf = StratifiedKFold(**kfold_params)
i = 5

for train_idx, test_idx in skf.split(images, labels):
  X_train, X_test = images[train_idx], images[test_idx]
  y_train, y_test = labels[train_idx], labels[test_idx]

  X_test, X_calib, y_test, y_calib = train_test_split(X_test, y_test, **calib_params)

  model = KNeighborsClassifier(**model_params)
  model.fit(X_train, y_train)

  calib_model = CalibratedClassifierCV(model, cv="prefit")
  calib_model.fit(X_calib, y_calib)

  predict = calib_model.predict(X_test)

  metrics = {
    'accuracy': accuracy_score(y_test, predict),
    'precision': precision_score(y_test, predict, average='macro'),
    'recall': recall_score(y_test, predict, average='macro'),
    'f1': f1_score(y_test, predict, average='macro'),
  }

  print(f"Accuracy: {metrics['accuracy']}\nPrecision: {metrics['precision']}\nRecall: {metrics['recall']}\nF1 Score: {metrics['f1']}")
  print(classification_report(y_test, predict))

  mlflow.set_experiment("KNN_Model StratifiedKFold Calibrated")
  mlflow.set_tracking_uri("https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow")

  with mlflow.start_run(run_name=f'KNN StratifiedKFold={i} Calibrated'):
    mlflow.log_params(gabor_params)
    mlflow.log_params(calib_params)
    mlflow.log_params(model_params)
    mlflow.log_params(kfold_params)
    mlflow.log_param('train_idx', train_idx)
    mlflow.log_param('test_idx', test_idx)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=calib_model,
        artifact_path='KNN Model',
        input_example=X_train[:1]
    )
  i = i + 1

Accuracy: 0.45318352059925093
Precision: 0.40570203017162
Recall: 0.3748225077172445
F1 Score: 0.37176120199919416
              precision    recall  f1-score   support

           0       0.61      0.62      0.62        32
           1       0.31      0.21      0.25        19
           2       0.36      0.13      0.20        30
           3       0.36      0.19      0.24        27
           4       0.56      0.73      0.64        78
           5       0.31      0.22      0.25        37
           6       0.33      0.52      0.41        44

    accuracy                           0.45       267
   macro avg       0.41      0.37      0.37       267
weighted avg       0.43      0.45      0.43       267

🏃 View run KNN StratifiedKFold=5 Calibrated at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/10/runs/8a64b302e6da4b79b28ea04ab7b8ac25
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/10
Accuracy: 0.46441947565543074
Precision: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.43820224719101125
Precision: 0.33086943568636246
Recall: 0.3521422896422896
F1 Score: 0.3284323844572965
              precision    recall  f1-score   support

           0       0.43      0.69      0.53        32
           1       0.00      0.00      0.00        19
           2       0.36      0.17      0.23        30
           3       0.31      0.19      0.23        27
           4       0.56      0.71      0.62        78
           5       0.25      0.24      0.25        37
           6       0.40      0.48      0.44        44

    accuracy                           0.44       267
   macro avg       0.33      0.35      0.33       267
weighted avg       0.39      0.44      0.40       267

🏃 View run KNN StratifiedKFold=7 Calibrated at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/10/runs/4cb1a7b3d8024aa9bc8458505a21965a
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/10
Accuracy: 0.3970037453183521
Precision: 

In [11]:
N_NEIGHBORS = 9
WEIGHTS = 'distance'
METRIC = 'manhattan'
ALGORITHM = 'ball_tree'

model_params = {
  'n_neighbors': N_NEIGHBORS,
  'weights': WEIGHTS,
  'metric': METRIC,
  'algorithm': ALGORITHM
}

model = KNeighborsClassifier(**model_params)
model.fit(X_train, y_train)

calib_model = CalibratedClassifierCV(model, cv="prefit")
calib_model.fit(X_calib, y_calib)

predict = calib_model.predict(X_test)

metrics = {
  'accuracy': accuracy_score(y_test, predict),
  'precision': precision_score(y_test, predict, average='macro'),
  'recall': recall_score(y_test, predict, average='macro'),
  'f1': f1_score(y_test, predict, average='macro'),
}

print(f"Accuracy: {metrics['accuracy']}\nPrecision: {metrics['precision']}\nRecall: {metrics['recall']}\nF1 Score: {metrics['f1']}")
print(classification_report(y_test, predict))

Accuracy: 0.41271820448877805
Precision: 0.35505141285443115
Recall: 0.33373441164794554
F1 Score: 0.3324115318106068


2024/12/04 03:16:05 INFO mlflow.tracking.fluent: Experiment with name 'KNN_Model using calibration' does not exist. Creating a new experiment.


🏃 View run KNN sigma=5 at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/8/runs/c1b2dcc5917e455fa84e0b57000e5062
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/8


RestException: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}

In [14]:
mlflow.set_experiment("KNN_Model using calibration")
mlflow.set_tracking_uri("https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow")

with mlflow.start_run(run_name=f'KNN sigma=5'):
  mlflow.log_params(gabor_params)
  mlflow.log_params(split_params)
  mlflow.log_param('calib_test_size', calib_params['test_size'])
  mlflow.log_param('calib_random_state', calib_params['random_state'])
  mlflow.log_params(model_params)
  mlflow.log_metrics(metrics)
  mlflow.sklearn.log_model(
      sk_model=calib_model,
      artifact_path='KNN Model',
      input_example=X_train[:1]
  )

🏃 View run KNN sigma=5 at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/8/runs/ce454a371af4424b87baacc1c7c55bf9
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/8


In [16]:
for p, y in zip(predict[:20], y_test[:20]):
  print(f'Predict: {LABELS[p]} | Real: {LABELS[y]}')

Predict: happy | Real: surprise
Predict: surprise | Real: surprise
Predict: surprise | Real: fear
Predict: happy | Real: happy
Predict: happy | Real: sad
Predict: surprise | Real: neutral
Predict: neutral | Real: sad
Predict: happy | Real: happy
Predict: angry | Real: neutral
Predict: sad | Real: happy
Predict: happy | Real: angry
Predict: sad | Real: sad
Predict: happy | Real: angry
Predict: sad | Real: angry
Predict: happy | Real: angry
Predict: disgust | Real: angry
Predict: angry | Real: angry
Predict: surprise | Real: fear
Predict: surprise | Real: surprise
Predict: angry | Real: fear


In [None]:
# N_NEIGHBORS = [3,5]
# WEIGHTS = ['uniform', 'distance']
# METRIC = ['minkowski', 'euclidean', 'manhattan']
# ALGORITHM = ['auto', 'ball_tree', 'kd_tree', 'brute']

# for n in N_NEIGHBORS:
#   for w in WEIGHTS:
#     for m in METRIC:
#       for a in ALGORITHM:
#         model_params = {
#             'n_neighbors': n,
#             'weights': w,
#             'metric': m,
#             'algorithm': a
#         }

#         model = KNeighborsClassifier(**model_params)
#         model.fit(X_train, y_train)

#         predict = model.predict(X_test)

#         metrics = {
#           'accuracy': accuracy_score(y_test, predict),
#           'precision': precision_score(y_test, predict, average='macro'),
#           'recall': recall_score(y_test, predict, average='macro'),
#           'f1': f1_score(y_test, predict, average='macro')
#         }

#         print(f"Accuracy: {metrics['accuracy']}\nPrecision: {metrics['precision']}\nRecall: {metrics['recall']}\nF1 Score: {metrics['f1']}")

#         mlflow.set_experiment("KNN_Model Theta=32")
#         mlflow.set_tracking_uri("https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow")

#         with mlflow.start_run(run_name=f'KNN n_neighbors={n}, weights={w}, metric={m}, algorithm={a}'):
#           mlflow.log_params(gabor_params)
#           mlflow.log_params(split_params)
#           mlflow.log_params(model_params)
#           mlflow.log_metrics(metrics)
#           mlflow.sklearn.log_model(
#               sk_model=model,
#               artifact_path='KNN Model',
#               input_example=X_train[:1]
#           )

Accuracy: 0.3074766355140187
Precision: 0.3050938501361262
Recall: 0.29387967874509396
F1 Score: 0.2783373964588805


2024/11/30 10:59:14 INFO mlflow.tracking.fluent: Experiment with name 'KNN_Model Theta=32' does not exist. Creating a new experiment.


🏃 View run KNN n_neighbors=3, weights=uniform, metric=minkowski, algorithm=auto at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/b02f353aaeb3491cb74358282af9a0b4
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.308411214953271
Precision: 0.30554388973180335
Recall: 0.2947200148795478
F1 Score: 0.27900485072077663




🏃 View run KNN n_neighbors=3, weights=uniform, metric=minkowski, algorithm=ball_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/923932c2679642a08150dd7eb2980d57
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.308411214953271
Precision: 0.30564408809920407
Recall: 0.2947200148795478
F1 Score: 0.2790072898045715
🏃 View run KNN n_neighbors=3, weights=uniform, metric=minkowski, algorithm=kd_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/aa15c822146048458410e873536155a7
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.3074766355140187
Precision: 0.3050938501361262
Recall: 0.29387967874509396
F1 Score: 0.2783373964588805
🏃 View run KNN n_neighbors=3, weights=uniform, metric=minkowski, algorithm=brute at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/df424c2fee9e46bd9b4a31f40bc56106
🧪 View exp



Accuracy: 0.308411214953271
Precision: 0.30554388973180335
Recall: 0.2947200148795478
F1 Score: 0.27900485072077663




🏃 View run KNN n_neighbors=3, weights=uniform, metric=euclidean, algorithm=ball_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/eec782be7df14b27a88a49477edf3286
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.308411214953271
Precision: 0.30564408809920407
Recall: 0.2947200148795478
F1 Score: 0.2790072898045715
🏃 View run KNN n_neighbors=3, weights=uniform, metric=euclidean, algorithm=kd_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/f6a2dd52f9ad4de7928c6899918adb7c
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.3074766355140187
Precision: 0.3050938501361262
Recall: 0.29387967874509396
F1 Score: 0.2783373964588805




🏃 View run KNN n_neighbors=3, weights=uniform, metric=euclidean, algorithm=brute at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/7da638b7f90b4ffc924ca1bba56e99b3
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.3233644859813084
Precision: 0.31787422780360425
Recall: 0.30837462993039805
F1 Score: 0.2955184378198163
🏃 View run KNN n_neighbors=3, weights=uniform, metric=manhattan, algorithm=auto at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/c0fe6cc1f71b4d0890cca4f4d474bb7d
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.3233644859813084
Precision: 0.3177394374501707
Recall: 0.30837462993039805
F1 Score: 0.2954523945289038
🏃 View run KNN n_neighbors=3, weights=uniform, metric=manhattan, algorithm=ball_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/3b8d1d02eaf148a78fbb3c667b6a18ba
🧪 View expe



🏃 View run KNN n_neighbors=3, weights=distance, metric=minkowski, algorithm=ball_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/f055b28fb9714f9fb135bc5babb520f8
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.34766355140186916
Precision: 0.3287131082820621
Recall: 0.3130277230955814
F1 Score: 0.31473980038813476
🏃 View run KNN n_neighbors=3, weights=distance, metric=minkowski, algorithm=kd_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/149874b20f7a402a9ec2acd812da0472
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.34579439252336447
Precision: 0.3263228137046797
Recall: 0.3106175282484117
F1 Score: 0.3123337182888574
🏃 View run KNN n_neighbors=3, weights=distance, metric=minkowski, algorithm=brute at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/dfff4fde858b47119d34d5c534636329
🧪 Vie



Accuracy: 0.34766355140186916
Precision: 0.3286418668785575
Recall: 0.3130277230955814
F1 Score: 0.3147611975894672
🏃 View run KNN n_neighbors=3, weights=distance, metric=euclidean, algorithm=ball_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/42485c29c4e0471089b6564a796b58d6
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4




Accuracy: 0.34766355140186916
Precision: 0.3287131082820621
Recall: 0.3130277230955814
F1 Score: 0.31473980038813476
🏃 View run KNN n_neighbors=3, weights=distance, metric=euclidean, algorithm=kd_tree at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/344be1df03114619a9efdab34917c339
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.34579439252336447
Precision: 0.3263228137046797
Recall: 0.3106175282484117
F1 Score: 0.3123337182888574
🏃 View run KNN n_neighbors=3, weights=distance, metric=euclidean, algorithm=brute at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4/runs/32696a22a213424999b7f229720bcf01
🧪 View experiment at: https://dagshub.com/IdjiotSandwiches/knn-fer.mlflow/#/experiments/4
Accuracy: 0.3663551401869159
Precision: 0.3502625307532238
Recall: 0.3333851627875379
F1 Score: 0.33528941745796753
🏃 View run KNN n_neighbors=3, weights=distance, metric=manhattan, algorithm=auto at: 