## Import Relevant Files

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torcheval.metrics import R2Score

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import math

import json

import sys
sys.path.insert(1, '../utils')
from paths import *

sys.path.insert(1, '../models')
from feedforward_nn_combined import NeuralNetworkCombined
from pprint import pprint

import chromadb

## Load Music Metadata

In [None]:
# Skipped DEAM 2014 because metadata.csv is corrupted

df_music_deam_2013 = pd.read_csv('../../data/DEAM/metadata/metadata_2013.csv')

df_music_pmemo = pd.read_csv('../../data/PMEmo/PMEmo2019/metadata.csv')

In [None]:
def strip_values(value):
    value = value.strip()
    return value

In [None]:
df_music_deam_2013_copy = df_music_deam_2013.copy()
df_music_deam_2013_copy['file_path'] = df_music_deam_2013_copy['song_id'].apply(lambda x: f'../../data/DEAM/MEMD_audio/{x}.mp3')
df_music_deam_2013_copy['song_id'] = df_music_deam_2013_copy['song_id'].apply(lambda x: f'deam_{x}')
df_music_deam_2013_copy = df_music_deam_2013_copy.drop(columns=["start of the segment (min.sec)", "end of the segment (min.sec)", "file_name", "Genre"])

df_music_deam_2013_copy.rename(columns={"Artist": "artist", "Song title": "title"}, inplace=True)

cols = df_music_deam_2013_copy.columns.tolist()

for col in cols:
    df_music_deam_2013_copy[col] = df_music_deam_2013_copy[col].map(strip_values)

display(df_music_deam_2013_copy)

In [None]:
df_pmemo_annotations = pd.read_csv('../../data/PMEmo/PMEmo2019/processed/annotations/pmemo_static_annotations.csv')
pmemo_song_ids = df_pmemo_annotations['song_id'].tolist()
print(pmemo_song_ids)

In [None]:
df_music_pmemo_copy = df_music_pmemo.copy()
df_music_pmemo_copy = df_music_pmemo_copy[df_music_pmemo_copy['musicId'].isin(pmemo_song_ids)]
df_music_pmemo_copy['file_path'] = df_music_pmemo_copy['musicId'].apply(lambda x: f'../../data/PMEmo/PMEmo2019/chorus/{x}.mp3')
df_music_pmemo_copy['song_id'] = df_music_pmemo['musicId'].apply(lambda x: f'pmemo_{x}')
df_music_pmemo_copy = df_music_pmemo_copy.drop(columns=["musicId", "fileName", "album", "duration", "chorus_start_time", "chorus_end_time"])

df_music_pmemo_copy = df_music_pmemo_copy[cols]
display(df_music_pmemo_copy)

In [None]:
df_music = pd.concat([df_music_deam_2013_copy, df_music_pmemo_copy], axis=0)

display(df_music)

## Load Opensmile Features

In [None]:
df_features = pd.read_csv("../../data/combined/features/scaled/normalised_opensmile_gemaps_features.csv")
display(df_features)

In [None]:
df_combined = df_music.merge(df_features, how="inner", on="song_id")
display(df_combined)

In [None]:
with open("../models/opensmile_gemaps_normalised/config.json") as file:
    config = json.load(file)

In [None]:
pprint(config)

In [None]:
feature_cols = config["columns"]

pprint(feature_cols)

In [None]:
scaler_fpath = config["scaler"]
# print(scaler_fpath)
scaler_fpath = scaler_fpath.replace(".", "../..", 1)
scaler = joblib.load(scaler_fpath) #MinMaxScaler

In [None]:
input_size = len(config["columns"])
print(input_size)
weights_fpath = config["model_weights"]
weights_fpath = weights_fpath.replace(".", "../..", 1)

model = NeuralNetworkCombined(input_size)
model.load_state_dict(torch.load(weights_fpath))

In [None]:
preds = model(inputs)
print(preds)

In [None]:
preds = torch.clamp(preds, min=-1, max=1)

In [None]:
# Pretend to keep only VA values - Used dummy variables instead
df_combined["valence_pred"] = preds[:, 0].detach().numpy()
df_combined["arousal_pred"] = preds[:, 1].detach().numpy()
display(df_combined)

In [None]:
plt.scatter(df_combined["valence_pred"], df_combined["arousal_pred"], c="green")
plt.grid(True)
plt.show()

In [None]:
metadata = df_combined.columns.tolist()[1:4]
# print(metadata)
metadata_dicts = df_combined[metadata].to_dict('records')
# print(metadata_dicts)

In [None]:
song_ids = df_combined["song_id"].values.tolist()
print(song_ids)

## Chroma Vector Database

In [None]:
client = chromadb.Client()

## Persistent store
# client = chromadb.PersistentClient(path="./db/")

client.heartbeat()


In [None]:
# https://docs.trychroma.com/usage-guide#changing-the-distance-function
collection = client.create_collection(
        name="SiTunes_dataset",
        metadata={"hnsw:space": "l2"} # "l2", "ip" or "cosine"
    )

In [None]:
# Set number of results to return
k = 3

In [None]:
collection.query(
    query_embeddings=[[1, 1]], # search by embeddings
    n_results=k, # number of results
    # where={"metadata_field": "is_equal_to_this"}, # filter on metadata, e.g. genre
    # where_document={"$contains":"search_string"} # filter on file path???
    # include=["distances", "metadatas", "embeddings", "documents", "uris", "data"] # Need to include all relevant parameters to show
    include=["distances", "metadatas", "embeddings"]
)

In [None]:
collection.query(
    query_embeddings=[[-1, -1]], # search by embeddings
    n_results=k, # number of results
    include=["distances", "metadatas", "embeddings"]
)

In [None]:
collection.query(
    query_embeddings=[[-1, 1]], # search by embeddings
    n_results=k, # number of results
    include=["distances", "metadatas", "embeddings"]
)

In [None]:
collection.query(
    query_embeddings=[[0, 0]], # search by embeddings
    n_results=k, # number of results
    include=["distances", "metadatas", "embeddings"]
)