# Assignment 2: Affect Recognition from Landmarks

**Group 2** : Abhinav Ramalingam & Victoria Van Rillaer

In [1]:
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Preprocessing

In [13]:
train_data = pd.read_csv("dataset.csv")
test_data = pd.read_csv("test_to_submit.csv")

label_encoder = LabelEncoder()
train_data['emotion'] = label_encoder.fit_transform(train_data['emotion'])


X = train_data.drop(columns=['emotion'])  
y = train_data['emotion'] 


scaler = StandardScaler()
X = scaler.fit_transform(X)
test_to_submit = scaler.transform(test_data) 


X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)


In [14]:
y_train.shape

(812,)

## Selection of the model

### Basic Machine Learning models

In [4]:
models = {
    "Random Forest": RandomForestClassifier(random_state=42, class_weight='balanced'),
    "Support Vector Machine": SVC(kernel='linear', random_state=42, class_weight='balanced'),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=7),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42)
}

results = []

# Open a text file to write results
with open("model_results.txt", "a") as file:
    for model_name, model in models.items():
       
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        
        result = {
            "Model": model_name,
            "Parameters": model.get_params(),
            "Accuracy": accuracy
        }
        results.append(result)
        
        file.write(f"Model: {model_name}\n")
        file.write(f"Parameters: {result['Parameters']}\n")
        file.write(f"Accuracy: {accuracy:.4f}\n")
        file.write("="*40 + "\n")

results_df = pd.DataFrame(results)
print(results_df)

                    Model                                         Parameters  \
0           Random Forest  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...   
1  Support Vector Machine  {'C': 1.0, 'break_ties': False, 'cache_size': ...   
2     K-Nearest Neighbors  {'algorithm': 'auto', 'leaf_size': 30, 'metric...   
3       Gradient Boosting  {'ccp_alpha': 0.0, 'criterion': 'friedman_mse'...   

   Accuracy  
0  0.600000  
1  0.542857  
2  0.582857  
3  0.600000  


In [7]:
y_train.shape

(812,)

### MLP model

In [6]:
model = Sequential()

model.add(Dense(64, input_dim=X_train.shape[1], activation='relu')) 
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu')) 
model.add(Dense(y_train.shape[1], activation='softmax')) 

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

IndexError: tuple index out of range

In [None]:
# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, 
                    validation_data=(X_val, y_val), 
                    callbacks=[early_stopping])

## Hyperparameters tuning

In [None]:
neurons = [32, 48, 64, 128]  
hidden_layer_options = [1, 2, 3] 
best_accuracy = 0
accuracies = []

for num_layers in hidden_layer_options:
    for neuron in neurons:
       
        model = Sequential()
        model.add(Dense(neuron, input_dim=X_train.shape[1], activation='relu'))  
        for _ in range(num_layers - 1):  
            model.add(Dense(neuron // 2, activation='relu'))
        model.add(Dense(y_train.shape[1], activation='softmax')) 
        
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        

        history = model.fit(X_train, y_train, epochs=50, batch_size=32, 
                            validation_data=(X_val, y_val), callbacks=[early_stopping])
        

        _, accuracy = model.evaluate(X_test, y_test)
        accuracies.append((num_layers, neuron, accuracy))  

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

print(f"Best Model Test Accuracy: {best_accuracy * 100:.2f}%")
print("Best Model:", best_model.summary())

accuracies_np = np.array(accuracies)
neurons_list = accuracies_np[:, 1] 
accuracies_per_layers = {layer: [] for layer in hidden_layer_options}

for (num_layers, neuron, accuracy) in accuracies:
    accuracies_per_layers[num_layers].append((neuron, accuracy))

## Analysis and comparison

In [None]:
plt.figure(figsize=(10, 6))
for num_layers, results in accuracies_per_layers.items():
    neurons_vals, accuracies_vals = zip(*results)
    plt.plot(neurons_vals, accuracies_vals, label=f'{num_layers} Layers', marker='o')

plt.xlabel('Number of Neurons in Hidden Layers')
plt.ylabel('Test Accuracy')
plt.title('Effect of Hidden Layers and Neurons on Model Accuracy')
plt.legend()
plt.grid(True)
plt.savefig("analysis.pdf")
plt.show()

## Classification of the samples

In [None]:
test_predictions = best_model.predict(test_to_submit)

predicted_labels = np.argmax(test_predictions, axis=1)

submission = pd.DataFrame(predicted_labels, columns=['emotion'])

emotion_labels = list(y.columns)  
submission['emotion'] = submission['emotion'].map(lambda x: emotion_labels[x])

submission.to_csv('submission.csv', index=False)