In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

# Load and preprocess the data
data = pd.read_csv('final_dataset.csv')
X = data.iloc[:, :-1].values / 255.0
y = data.iloc[:, -1].values

# Encode the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Create a pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95)),  # Keep 95% of variance
    ('svm', SVC(random_state=42))
])

# Define hyperparameters for grid search
param_grid = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__gamma': ['scale', 'auto', 0.1, 0.01, 0.001],
    'svm__kernel': ['rbf', 'poly'],
    'svm__degree': [2, 3, 4],  # Only used by poly kernel
    'svm__class_weight': [None, 'balanced'],
}

# Perform grid search
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    n_jobs=-1,
    verbose=1,
    scoring=['accuracy', 'precision_macro', 'recall_macro'],
    refit='accuracy'
)

# Fit the grid search
grid_search.fit(X_train, y_train)

# Get the best model
best_model = grid_search.best_estimator_

# Perform cross-validation on the best model
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {cv_scores.mean():.4f}")

# Train the best model on the entire training set
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f"\nBest parameters: {grid_search.best_params_}")
print(f"Test accuracy: {accuracy:.4f}")
print(f"Test precision: {precision:.4f}")
print(f"Test recall: {recall:.4f}")

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Function to predict emotion for new RGB values
def predict_emotion(rgb_values):
    rgb_array = np.array(rgb_values).reshape(1, -1) / 255.0
    prediction_encoded = best_model.predict(rgb_array)
    prediction = label_encoder.inverse_transform(prediction_encoded)
    return prediction[0]

# Example usage
new_rgb = [229, 0, 13, 225, 225, 255, 253, 166, 74]
predicted_emotion = predict_emotion(new_rgb)
print(f"\nPredicted Emotion: {predicted_emotion}")

Fitting 5 folds for each of 240 candidates, totalling 1200 fits
Cross-validation scores: [0.72362326 0.73957797 0.746138   0.73789907 0.73429454]
Mean CV score: 0.7363

Best parameters: {'svm__C': 100, 'svm__class_weight': 'balanced', 'svm__degree': 2, 'svm__gamma': 'auto', 'svm__kernel': 'rbf'}
Test accuracy: 0.7707
Test precision: 0.7765
Test recall: 0.7714

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.92      0.83        13
           1       0.80      0.57      0.67        14
           2       1.00      0.85      0.92        13
           3       0.64      0.64      0.64        14
           4       0.47      0.62      0.53        13
           5       0.77      0.71      0.74        14
           6       0.83      0.77      0.80        13
           7       0.72      1.00      0.84        13
           8       0.74      1.00      0.85        14
           9       1.00      0.86      0.92        14
          10       