<a href="https://colab.research.google.com/github/SreyaJampana/Facial-Emotion-Detection/blob/main/Section2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import dlib
import gdown
import pickle
import warnings
import itertools

import numpy as np
import pandas as pd
import seaborn as sns

import urllib.request

from sklearn import metrics
from sklearn.metrics import accuracy_score

from scipy.spatial import distance
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

###Getting the csv data loaded
!wget -q --show-progress -O ./ferdata.csv "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Emotion%20Detection/fer2013_5.csv"

###Getting the Dlib Shape predictor!
!wget -q --show-progress -O ./shape_predictor_68_face_landmarks.dat "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Emotion%20Detection/shape_predictor_68_face_landmarks.dat"

###Getting the Xpure loaded
!wget -q --show-progress -O ./pureX.npy "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Emotion%20Detection/pureX.npy"

###Getting the Xdata loaded
!wget -q --show-progress -O ./dataX.npy "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Emotion%20Detection/dataX.npy"

###Getting the Ydata loaded
!wget -q --show-progress -O ./dataY.npy "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Emotion%20Detection/dataY.npy"

print ("Data Downloaded!")


Data Downloaded!


#Milestone 1: Understanding the Feature Generation Process

##Distance between Facial Landmarks



Distances measured between specific facial landmarks can serve as valuable input features (X) for a predictive model. In this model, these features are analyzed to determine the corresponding facial expressions or emotions, which are the outputs (Y). By feeding these calculated distances into the model, it can effectively learn to associate particular patterns of facial landmark distances with specific emotional states.


In [None]:
#Integer to Label Mapping
label_map = {0:"ANGRY",1:"HAPPY",2:"SAD",3:"SURPRISE",4:"NEUTRAL"}

#Load the data
df = pd.read_csv("./ferdata.csv")
df.head()

In [None]:
# generate x labels for our plot
emotion_labels = [label_map[i] for i in label_map.keys()]

# generate counts for each emotion type
emotion_counts = [np.sum(df["emotion"] == i) for i in range(len(label_map))]

# generate a bar plot for our emotion labels that has different colors
[plt.bar(x = emotion_labels[i], height = emotion_counts[i] ) for i in range(len(emotion_labels))]

# make the plot interpretable with x and y labels + title
plt.xlabel('EMOTION LABEL')
plt.ylabel('N OBSERVSATIONS')
plt.title('A balanced distribution of emotions in our data set', y=1.05);

In [None]:

# Load's dlib's pretrained face detector model
#frontalface_detector = dlib.get_frontal_face_detector()


#Load the 68 face Landmark file
predictor = dlib.shape_predictor('./shape_predictor_68_face_landmarks.dat')
"""
Returns facial landmarks for the given input image path
"""
def get_landmarks(image):


  #:type image : cv2 object
  #:rtype landmarks : list of tuples where each tuple represents
  #                  the x and y co-ordinates of facial keypoints

  #Bounding Box co-ordinates around the face(Training data is 48*48(cropped faces))
  rects = [dlib.rectangle(left=1, top=1, right=47, bottom=47)]

  #Read Image using OpenCV
  #image = cv2.imread(image_path)
  #Detect the Faces within the image
  landmarks = [(p.x, p.y) for p in predictor(image, rects[0]).parts()]
  return image,landmarks

"""
Display image with its Facial Landmarks
"""
def plot_image_landmarks(image,face_landmarks):
  """
  :type image_path : str
  :type face_landmarks : list of tuples where each tuple represents
                     the x and y co-ordinates of facial keypoints
  :rtype : None
  """
  radius = -2
  circle_thickness = 1
  image_copy = image.copy()
  for (x, y) in face_landmarks:
    cv2.circle(image_copy, (x, y), circle_thickness, (255,0,0), radius)

  plt.imshow(image_copy, interpolation='nearest', cmap='Greys_r')
  plt.xticks([]); plt.yticks([])
  plt.show()


'''
Converts pixels values to 2D-image.
Displays the image and returns the cv2 image
object
'''
def get_pixels_image(img_pixels,plt_flag):
  """
  :type image_pixels : str
  :type plt_flag : boolean
  :rtype image : cv2 object
  """

  width = 48
  height = 48

  image = np.fromstring(img_pixels, dtype=np.uint8, sep=" ").reshape((height, width))

  if plt_flag:
      plt.imshow(image, interpolation='nearest', cmap="Greys_r")
      plt.xticks([]); plt.yticks([])
      plt.show()


  return image


In [None]:
# select random index
i_index = np.random.randint(len(df))

# extract pixel values
image_pixels = df['pixels'][i_index]

# convert pixels to 2D Images
image = get_pixels_image(image_pixels, True)

In [None]:
#Extract the Facial Landmarks
image,facial_landmarks = get_landmarks(image)

#Display the Facial Landmarks on the Image
plot_image_landmarks(image,facial_landmarks)

In [None]:
for i, j in itertools.combinations(range(4), 2):
  print(i, j)

In [None]:
"""
Computes euclidean distance between 68 Landmark Points for our features
e_dist is a list of features that will go into our model.
Each feature is a distance between two landmark points, and every pair of points
must have a feature.
Scipy Library has readily available fuction to compute euclidean distance. You can
compute the distance using distance.euclidean(point1,point2)
point1,point2 :2D points
"""
def get_all_landmarks_euclid_dist(face_landmarks):

    e_dist = []
    # FILL ME IN!
    # Use this to get the distance between two points:
    #               distance.euclidean(face_landmarks[i],face_landmarks[j])

    return e_dist

In [None]:

def preprocess_data(df):

  X = []
  Y = []
  X_pixels = []

  n_pixels = 2304

  for index, row in (df.iterrows()):

      if index%1000 == 0:
        print (index, "Datapoints Processed")

      try:
          image = get_pixels_image(row['pixels'],0)
          X_pixels.append(image.ravel())
          image = cv2.GaussianBlur(image,(5,5),0)

          _,face_landmarks = get_landmarks(image)
          X.append(get_all_landmarks_euclid_dist(face_landmarks)) # Using our feature function!
          Y.append(row['emotion'])

      except Exception as e:
          print ("An error occured:",e)

  #Save the data
  np.save("pureX", X_pixels)
  np.save("dataX", X)
  np.save("dataY", Y)

  return np.array(X_pixels),np.array(X),np.array(Y)

In [None]:
# set to True if we want to preload data -- which has already been generated for us :)
preload = True

if preload:

  # load outputs saved in this folder after running preprocess_data()
  dataX = np.load('./dataX.npy')
  dataY = np.load('./dataY.npy', allow_pickle=True)

else:

  # this takes 15-20 minutes to run, but someone has already run it and saved the ouputs in this folder
  pureX, dataX, dataY = preprocess_data(df)

In [None]:
#Split Data into Train, Test (90-10)
X_train, X_test, y_train, y_test = train_test_split(dataX, dataY, test_size=0.1, random_state=42,stratify =dataY)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,y_train)
model.score(X_test,y_test)

In [None]:
####Standardize the data####################
###Note: Do not use test data to fit your Standardscaler Model
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.shape

#Milestone 3 : Applying Machine Learning to Emotion Detection

In [None]:

#######Train the model##################
knn = KNeighborsClassifier(n_neighbors=10)
print ("Training the knn model")
knn.fit(X_train, y_train)

#######Evaluate the model##################
# they might use accuracy_score
#This cell will take longer to run(5-10mins)!
print ("Predict for KNN Model")
y_predknn = knn.predict(X_test)
print ("Prediction Completed")
print ("Test Accuracy(KNN):",metrics.accuracy_score(y_test, y_predknn))

#-----------------DecisionTreeClassifier--------------#
#######Train the model##################
dt = DecisionTreeClassifier(max_depth=20)
print ("Training the Decision Tree model")
dt.fit(X_train, y_train)
print ("Completed Decision Tree Training")

#######Evaluate the model##################

print ("Predict for Decision Tree Model")
y_preddt = dt.predict(X_test)
print ("Test Accuracy(DT):",metrics.accuracy_score(y_test, y_preddt))


#-----------------Logistic Regression--------------#
#######Train the model##################
lr = LogisticRegression(solver='lbfgs',multi_class='multinomial')
print ("Training the Logistic Regression model")
lr.fit(X_train, y_train)
print ("Completed LR Training")

#######Evaluate the model##################
# they might use accuracy_score
#This cell will take longer to run(5-10mins)!
print ("Predict for LR Model")
y_predlr = lr.predict(X_test)
print ("Test Accuracy(LR):",metrics.accuracy_score(y_test, y_predlr))

In [None]:
'''
Plots the confusion Matrix and saves it
Pass the true labels and the predicted labels
'''
def plot_confusion_matrix(y_true,y_predicted):
  cm = metrics.confusion_matrix(y_true, y_predicted)
  print ("Plotting the Confusion Matrix")
  labels = list(label_map.values())
  df_cm = pd.DataFrame(cm,index = labels,columns = labels)
  fig = plt.figure()
  res = sns.heatmap(df_cm, annot=True,cmap='Blues', fmt='g')
  plt.yticks([0.5,1.5,2.5,3.5,4.5], labels,va='center')
  plt.title('Confusion Matrix - TestData')
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.show()
  plt.close()

In [None]:
plot_confusion_matrix(
        y_test, y_predlr
        )

#Milestone 4: Coding Exercise



In [None]:
#Load the true pixel data and corresponding labels
X = np.load('pureX.npy')
Y = np.load('dataY.npy')

In [None]:
#Split Data into Train, Test (90-10)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42,stratify =Y)


#Standardize the Data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#######Train the model##################
knn = KNeighborsClassifier(n_neighbors=10)
print ("Training the knn model")
knn.fit(X_train, y_train)

#######Evaluate the model##################
# they might use accuracy_score
#This cell will take longer to run(5-10mins)!
print ("Predict for KNN Model")
y_predknn = knn.predict(X_test)
print ("Prediction Completed")
print ("Test Accuracy(KNN):",metrics.accuracy_score(y_test, y_predknn))



#-----------------DecisionTreeClassifier--------------#
#######Train the model##################
dt = DecisionTreeClassifier(max_depth=20)
print ("Training the Decision Tree model")
dt.fit(X_train, y_train)
print ("Completed Decision Tree Training")

#######Evaluate the model##################

print ("Predict for Decision Tree Model")
y_preddt = dt.predict(X_test)
print ("Test Accuracy(DT):",metrics.accuracy_score(y_test, y_preddt))


#-----------------Logistic Regression--------------#
#######Train the model##################
lr = LogisticRegression(solver='lbfgs',multi_class='multinomial')
print ("Training the Logistic Regression model")
lr.fit(X_train, y_train)
print ("Completed LR Training")

#######Evaluate the model##################
# they might use accuracy_score
#This cell will take longer to run(5-10mins)!
print ("Predict for LR Model")
y_predlr = lr.predict(X_test)
print ("Test Accuracy(LR):",metrics.accuracy_score(y_test, y_predlr))

In [None]:
from joblib import dump
dump(MODEL_VARIABLE, "MODEL_NAME.joblib")