In [2]:
import numpy as np
import pandas as pd

In [3]:
song_info = pd.read_csv("C:\\Users\Majid\\Programming Files\\FYP\\Datasets\\EMO_Dataset\\annotations\\static_annotations.csv")
song_info.head(20)

Unnamed: 0,song_id,mean_arousal,std_arousal,mean_valence,std_valence
0,2,3.1,0.99443,3.0,0.66667
1,3,3.5,1.8409,3.3,1.7029
2,4,5.7,1.4944,5.5,1.7159
3,5,4.4,2.1187,5.3,1.9465
4,7,5.8,1.5492,6.4,1.7764
5,8,3.2,1.4757,4.8,1.6193
6,10,4.0,1.7638,4.7,1.9465
7,12,5.5,2.0138,5.8,1.9889
8,13,3.2,1.4757,4.0,1.7638
9,17,4.4,1.8974,6.0,2.1602


In [4]:
static_df = song_info.drop(columns=['std_arousal', 'std_valence'])

static_df.head()

Unnamed: 0,song_id,mean_arousal,mean_valence
0,2,3.1,3.0
1,3,3.5,3.3
2,4,5.7,5.5
3,5,4.4,5.3
4,7,5.8,6.4


In [5]:
min_arousal = static_df["mean_arousal"].min()
max_arousal = static_df["mean_arousal"].max()
min_valence = static_df["mean_valence"].min()
max_valence = static_df["mean_valence"].max()

# Normalize the arousal and valence values to range from -1 to 1
static_df["norm_arousal"] = (static_df["mean_arousal"] - min_arousal) / (max_arousal - min_arousal) * 2 - 1
static_df["norm_valence"] = (static_df["mean_valence"] - min_valence) / (max_valence - min_valence) * 2 - 1

In [6]:
def define_emotion(norm_arousal	, norm_valence):
    # Define emotions dictionary
    emotions = {
        (1, 1): "Happy",
        (1, 0): "Energetic",
        (0, 1): "Peaceful",
        (-1, -1): "Sad",
        (-1, 0): "Relaxed",
        (0, -1): "Calm",
        (1, -1): "Excited",
        (-1, 1): "Serious"
    }

    # Check if arousal and valence values are within range
    if norm_arousal > 1 or norm_arousal < -1 or norm_valence > 1 or norm_valence < -1:
        return "Invalid values"

    # Determine the emotion based on the arousal and valence values
    for key in emotions.keys():
        if key[0] == round(norm_arousal) and key[1] == round(norm_valence):
            return emotions[key]

    return "Undefined"

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Add 'emotion' column
static_df['emotion'] = static_df.apply(lambda x: define_emotion(x['norm_arousal'], x['norm_valence']), axis=1)

# Split data into train and test sets
X = static_df[['norm_arousal', 'norm_valence']]
y = static_df['emotion']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameters to tune
hyperparameters = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Create Decision Tree classifier
decision_tree = DecisionTreeClassifier()

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(decision_tree, hyperparameters, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and create a new classifier
best_params = grid_search.best_params_
decision_tree_best = DecisionTreeClassifier(**best_params)

# Fit the classifier to the training data using the best hyperparameters
decision_tree_best.fit(X_train, y_train)

# Predict the emotions of the test set using the best classifier
song_emotion = decision_tree_best.predict(X_test)

# Create a new DataFrame with the predicted emotions
predicted_df = X_test.copy()
predicted_df['predicted_emotion'] = song_emotion

# Print the updated DataFrame with predicted emotions
print(predicted_df)

# Evaluate the performance of the best classifier
accuracy = accuracy_score(y_test, song_emotion)
print(f"Accuracy: {accuracy}")




     norm_arousal  norm_valence predicted_emotion
609     -0.676471     -0.076923           Relaxed
539     -0.058824      0.292308         Undefined
694     -0.294118      0.200000         Undefined
350     -0.147059      0.169231         Undefined
174     -0.058824     -0.015385         Undefined
..            ...           ...               ...
404     -0.323529     -0.353846         Undefined
69       0.235294     -0.046154         Undefined
712      0.147059      0.076923         Undefined
131      0.382353     -0.415385         Undefined
44       0.411765      0.292308         Undefined

[149 rows x 3 columns]
Accuracy: 1.0


In [9]:
# Number of unique values
num_unique_values = static_df['emotion'].nunique()
print(f"Number of unique values in 'emotion': {num_unique_values}")

# List of unique values
list_unique_values = static_df['emotion'].unique()
print(f"Unique values in 'emotion': {list_unique_values}")


Number of unique values in 'emotion': 8
Unique values in 'emotion': ['Sad' 'Undefined' 'Relaxed' 'Peaceful' 'Energetic' 'Calm' 'Happy'
 'Excited']


In [10]:
undefined_count = predicted_df[predicted_df['predicted_emotion'] == 'Undefined'].shape[0]
print(f"Number of instances classified as 'Undefined': {undefined_count}")


Number of instances classified as 'Undefined': 98
