# Recommender system

## Preparation

### Configuration

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip3 install beautifultable

### Modules import

In [None]:
import os

import pandas as pd
import numpy as np

from copy import deepcopy
from beautifultable import BeautifulTable

from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Constants

In [None]:
class AffectiveStates:
  HVHA = 'HVHA'
  HVLA = 'HVLA'
  LVHA = 'LVHA'
  LVLA = 'LVLA'

  VALENCE_MID = 5
  AROUSAL_MID = 5

  AFFECTIVE_STATES_TO_EMOTIONS = {
    HVHA: "happy, excited, astonished, delighted",
    HVLA: "content, relaxed, calm, sleepy",
    LVHA: "distressed, afraid, angry, annoyed",
    LVLA: "sad, depressed, bored, tired",
  }

class Paths:
  PROJECT_PATH = None # to fill
  
  RR_PEAKS_DATA_PATH = os.path.join(PROJECT_PATH, 'rr_peaks_eda.csv')

  VIDEO_LIST = os.path.join(PROJECT_PATH, 'video_list.csv')

class Model:
  NAME = "Decision Tree"
  INSTANCE = DecisionTreeClassifier(criterion='gini', min_samples_leaf=0.02, min_samples_split=0.09)

class Labels:
  VALENCE_LABEL = 'AVG_Valence'
  AROUSAL_LABEL = 'AVG_Arousal'

  AFFECTIVE_STATE_LABEL = 'affective_state'
  ARTIST_LABEL = 'Artist'
  TITLE_LABEL = 'Title'
  EXPERIMENT_ID_LABEL = 'Experiment_id'
  SONG_ID_LABEL = 'song_id'

  class Dataset:
    MAXS_LABEL = 0
    MEANS_LABEL = 1
    MINS_LABEL = 2

  class Results:
    TRAIN_ACCURACY_LABEL = 'train_accuracy'
    TEST_ACCURACY_LABEL = 'test_accuracy'
    F1_SCORE_LABEL = 'f1'
    PRECISION_LABEL = 'precision'
    RECALL_LABEL = 'recall'

    PRINTABLE_METRICS_LABELS = [
      TRAIN_ACCURACY_LABEL,
      TEST_ACCURACY_LABEL,
      F1_SCORE_LABEL,
      PRECISION_LABEL,
      RECALL_LABEL,
    ]

  class Visualization:
    PARAM_LABEL = 'param'
    VALUE_LABEL = 'value'

### Functions

In [None]:
def get_emotion_from_valence_and_arousal(valence, arousal):
  if valence >= AffectiveStates.VALENCE_MID:
    if arousal >= AffectiveStates.AROUSAL_MID:
      return AffectiveStates.HVHA
    else:
      return AffectiveStates.HVLA
  else:
    if arousal >= AffectiveStates.AROUSAL_MID:
      return AffectiveStates.LVHA
    else:
      return AffectiveStates.LVLA

def get_emotion_labels(va_df):
  return [get_emotion_from_valence_and_arousal(va[0], va[1]) for va in va_df.values]

def load_songs(path):
  raw = pd.read_csv(path)
  raw.rename(columns={Labels.EXPERIMENT_ID_LABEL: Labels.SONG_ID_LABEL}, inplace=True)
  raw.insert(len(raw.columns) - 1, Labels.AFFECTIVE_STATE_LABEL, get_emotion_labels(raw[[Labels.VALENCE_LABEL, Labels.AROUSAL_LABEL]]))
  songs = pd.DataFrame(raw[[Labels.ARTIST_LABEL, Labels.TITLE_LABEL, Labels.AFFECTIVE_STATE_LABEL]])
  songs.index = raw[Labels.SONG_ID_LABEL]

  return songs

def load_signals_from_csv(path):
  return pd.read_csv(path)

def factorize_labels_in_dataframe(df):
  df[[Labels.AFFECTIVE_STATE_LABEL]], old_values = pd.factorize(df[Labels.AFFECTIVE_STATE_LABEL])
  new_values = pd.unique(df[Labels.AFFECTIVE_STATE_LABEL])
  factorization = dict(zip(new_values.tolist(), old_values.tolist()))
  return df, factorization

def convert_string_signal_to_array(row):
  row_cp = deepcopy(row)
  row_cp[Labels.Dataset.MAXS_LABEL] = np.fromstring(row[Labels.Dataset.MAXS_LABEL][1:-2], sep=',').mean()
  row_cp[Labels.Dataset.MEANS_LABEL] = np.fromstring(row[Labels.Dataset.MEANS_LABEL][1:-2], sep=',').mean()
  row_cp[Labels.Dataset.MINS_LABEL] = np.fromstring(row[Labels.Dataset.MINS_LABEL][1:-2], sep=',').mean()

  return row_cp

def get_train_and_test_split(data, test_size=0.2):
  scaler = MinMaxScaler()

  features = data.columns[2:-1]
  targets = data.columns[-1]

  raw_X = data[features].to_numpy()
  y = data[targets].to_numpy()
  X = np.asarray(list(map(convert_string_signal_to_array, raw_X)))

  X, y = shuffle(X, y)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  return X_train, X_test, y_train, y_test

def get_evaluation(y_train, y_test, y_train_pred, y_test_pred):
  train_accuracy = accuracy_score(y_train, y_train_pred)
  test_accuracy = accuracy_score(y_test, y_test_pred)
  f1 = f1_score(y_test, y_test_pred, average='micro', zero_division=0)
  precision = precision_score(y_test, y_test_pred, average='macro', zero_division=0)
  recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

  results = {
      Labels.Results.TRAIN_ACCURACY_LABEL: train_accuracy,
      Labels.Results.TEST_ACCURACY_LABEL: test_accuracy,
      Labels.Results.F1_SCORE_LABEL: f1,
      Labels.Results.PRECISION_LABEL: precision,
      Labels.Results.RECALL_LABEL: recall,
  }

  return results

def create_metrics_table(name, results, iterations):
  table = BeautifulTable(precision=5)

  sub_headers = [
                  Labels.Visualization.PARAM_LABEL,
                  Labels.Visualization.VALUE_LABEL,
                ]
  
  columns_number = len(sub_headers) - 1
  table.columns.header = [name, *([''] * columns_number)]
  table.rows.append(sub_headers)

  for label in Labels.Results.PRINTABLE_METRICS_LABELS:
    metrics = results[label]
    values = [label, metrics]
    table.rows.append(values)

  return table

def print_metrics(clf_name, results):
  iterations = len(results)
  metrics_table = create_metrics_table(clf_name, results, iterations)
  print(metrics_table)
  print()

def get_songs_based_on_affective_state(songs_df, affective_state):
  return songs_df[songs_df[Labels.AFFECTIVE_STATE_LABEL] == affective_state]

def make_single_recommendation(songs, predicted_affective_state, true_affective_state):
  matching_songs = get_songs_based_on_affective_state(songs, predicted_affective_state)

  true_emotions = AffectiveStates.AFFECTIVE_STATES_TO_EMOTIONS[true_affective_state]
  predicted_emotions = AffectiveStates.AFFECTIVE_STATES_TO_EMOTIONS[predicted_affective_state]
  
  print(f"[{true_affective_state}]")
  print(f"  You think you feel {true_emotions}...")
  print(f"[{predicted_affective_state}]")
  print(f"And I think you feel {predicted_emotions}...")
  print("Maybe you would like to listen to...")

  for _, song in matching_songs.iterrows():
    print(f"    - {song[Labels.ARTIST_LABEL]} - '{song[Labels.TITLE_LABEL]}'?")

  print()

def make_recommendations(clf, X_test, y_test, factorization):
  songs = load_songs(Paths.VIDEO_LIST)
  y_predictions = clf.predict(X_test)

  true_affective_states = list(map(lambda state_number: factorization[state_number], y_test))
  predicted_affective_states = list(map(lambda state_number: factorization[state_number], y_predictions))

  correct_states_number = sum(map(lambda true_state, predicted_state: true_state == predicted_state, true_affective_states, predicted_affective_states))
  all_states_number = len(y_predictions)

  print(f"Correct recommendations: {correct_states_number}/{all_states_number}")

  for index, (true_affective_state, predicted_affective_state) in enumerate(zip(true_affective_states, predicted_affective_states)):
    print(f"Recommendation no. {index + 1}")
    make_single_recommendation(songs, predicted_affective_state, true_affective_state)

def process_recommender_system(path, clf, clf_name, n=1):
  signals = load_signals_from_csv(path)
  singals, numbers_as_affective_states = factorize_labels_in_dataframe(signals)

  X_train, X_test, y_train, y_test = get_train_and_test_split(signals)
  clf.fit(X_train, y_train)
  y_train_pred = clf.predict(X_train)
  y_test_pred = clf.predict(X_test)

  results = get_evaluation(y_train, y_test, y_train_pred, y_test_pred)
  print_metrics(clf_name, results)

  X_test, y_test = shuffle(X_test, y_test)
  X_samples, y_samples = X_test[:n], y_test[:n]

  make_recommendations(clf, X_samples, y_samples, numbers_as_affective_states)

## Recommendations

### Overview of songs

In [None]:
songs = load_songs(Paths.VIDEO_LIST)

In [None]:
songs[songs[Labels.AFFECTIVE_STATE_LABEL] == AffectiveStates.HVHA]

Unnamed: 0_level_0,Artist,Title,affective_state
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,EmilĂ­ana Torrini,Jungle Drum,HVHA
2,Lustra,Scotty Doesn't Know,HVHA
3,Jackson 5,Blame It On The Boogie,HVHA
4,The B52'S,Love Shack,HVHA
5,Blur,Song 2,HVHA
6,Blink 182,First Date,HVHA
7,Benny Benassi,Satisfaction,HVHA
8,Lily Allen,Fuck You,HVHA
9,Queen,I Want To Break Free,HVHA
10,Rage Against The Machine,Bombtrack,HVHA


In [None]:
songs[songs[Labels.AFFECTIVE_STATE_LABEL] == AffectiveStates.HVLA]

Unnamed: 0_level_0,Artist,Title,affective_state
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
11,Michael Franti & Spearhead,Say Hey (I Love You),HVLA
12,Grand Archives,Miniature Birds,HVLA
13,Bright Eyes,First Day Of My Life,HVLA
14,Jason Mraz,I'm Yours,HVLA
15,Bishop Allen,Butterfly Nets,HVLA
16,The Submarines,Darkest Things,HVLA
17,Air,Moon Safari,HVLA
18,Louis Armstrong,What A Wonderful World,HVLA
19,Manu Chao,Me Gustas Tu,HVLA
20,Taylor Swift,Love Story,HVLA


In [None]:
songs[songs[Labels.AFFECTIVE_STATE_LABEL] == AffectiveStates.LVLA]

Unnamed: 0_level_0,Artist,Title,affective_state
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
21,Diamanda Galas,Gloomy Sunday,LVLA
22,Porcupine Tree,Normal,LVLA
23,Wilco,How To Fight Loneliness,LVLA
24,James Blunt,Goodbye My Lover,LVLA
25,A Fine Frenzy,Goodbye My Almost Lover,LVLA
26,Kings Of Convenience,The Weight Of My Words,LVLA
27,Madonna,Rain,LVLA
28,Sia,Breathe Me,LVLA
29,Christina Aguilera,Hurt,LVLA
30,Enya,May It Be (Saving Private Ryan),LVLA


In [None]:
songs[songs[Labels.AFFECTIVE_STATE_LABEL] == AffectiveStates.LVHA]

Unnamed: 0_level_0,Artist,Title,affective_state
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
31,Mortemia,The One I Once Was,LVHA
32,Marilyn Manson,The Beautiful People,LVHA
33,Dead To Fall,Bastard Set Of Dreams,LVHA
34,Dj Paul Elstak,A Hardcore State Of Mind,LVHA
35,Napalm Death,Procrastination On The Empty Vessel,LVHA
36,Sepultura,Refuse Resist,LVHA
37,Cradle Of Filth,Scorched Earth Erotica,LVHA
38,Gorgoroth,Carving A Giant,LVHA
39,Dark Funeral,My Funeral,LVHA
40,Arch Enemy,My Apocalypse,LVHA


### Getting exemplary recommendations

In [None]:
process_recommender_system(Paths.RR_PEAKS_DATA_PATH, Model.INSTANCE, Model.NAME, n=5)

+----------------+---------+
| Decision Tree  |         |
+----------------+---------+
|     param      |  value  |
+----------------+---------+
| train_accuracy | 0.40938 |
+----------------+---------+
| test_accuracy  | 0.2875  |
+----------------+---------+
|       f1       | 0.2875  |
+----------------+---------+
|   precision    | 0.28455 |
+----------------+---------+
|     recall     | 0.2875  |
+----------------+---------+

Correct recommendations: 2/5
Recommendation no. 1
[LVHA]
  You think you feel distressed, afraid, angry, annoyed...
[HVHA]
And I think you feel happy, excited, astonished, delighted...
Maybe you would like to listen to...
    - EmilĂ­ana Torrini - 'Jungle Drum'?
    - Lustra - 'Scotty Doesn't Know'?
    - Jackson 5 - 'Blame It On The Boogie'?
    - The B52'S  - 'Love Shack'?
    - Blur  - 'Song 2 '?
    - Blink 182  - 'First Date'?
    - Benny Benassi  - 'Satisfaction '?
    - Lily Allen  - 'Fuck You'?
    - Queen - 'I Want To Break Free'?
    - Rage Against