### Linear Combination model

##### Inputs: outputs of MFCC and numerical regression models

##### Output: multi-labels: Danceability, Instrumentalness, Speechiness, Acuosticness, Energy 

In [2]:
import torch
import torchaudio

In [3]:
import os
import requests
import torch

import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
labels_file = "Data/tracks_features.csv"
all_tracks = pd.read_csv(labels_file)

#### ONLY RUN IN GOOGLE COLAB ###

In [None]:
def name_of_file(track_id):
  filename = f"/content/drive/MyDrive/tracks_features_audio/{track_id}_audio.mp3"
  return filename

In [None]:
myDrive = "/content/drive/MyDrive/"
tensor_file = os.path.join(os.path.dirname(myDrive), "mfcc_tensor.pt")

# Load the tensor back and print the shape
loaded_mfcc_tensor = torch.load(tensor_file)
print("Loaded MFCC tensor shape:", loaded_mfcc_tensor.shape)

#### Import MFCC model

In [4]:
cnn_model_path = "models/cnn_model_weights.pth"
rnn_model_path = "models/rnn_model_weights.pth"
multi_task_model_path = "models/multi_task_rnn_model_weights.pth"

In [6]:

class AudioDataset(torch.utils.data.Dataset):
  def __init__(self, mfcc_tensor, df):
    self.df = df
    self.mfcc = mfcc_tensor
    self.mean = self.mfcc.mean()
    self.std = self.mfcc.std()

    # Standardize MFCC tensor
    self.mfcc = (self.mfcc - self.mean) / self.std

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    label = torch.tensor(self.df.iloc[idx]['danceability'], dtype=torch.float32)
    mfcc = self.mfcc[idx]
    return mfcc, label


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# Load the MFCC tensor
myDrive = "/content/drive/MyDrive/"
tensor_file = os.path.join(os.path.dirname(myDrive), "mfcc_tensor.pt")
loaded_mfcc_tensor = torch.load(tensor_file).squeeze(1)
print("Loaded MFCC tensor shape:", loaded_mfcc_tensor.shape)

# Split all_tracks and the loaded MFCC tensor
train_tracks, test_val_tracks = train_test_split(all_tracks, test_size=0.3, random_state=42)
test_tracks, val_tracks = train_test_split(test_val_tracks, test_size=0.5, random_state=42)

train_mfcc, test_val_mfcc = train_test_split(loaded_mfcc_tensor, test_size=0.3, random_state=42)
test_mfcc, val_mfcc = train_test_split(test_val_mfcc, test_size=0.5, random_state=42)

# Create the datasets
train_dataset = AudioDataset(train_mfcc, train_tracks)
val_dataset = AudioDataset(val_mfcc, val_tracks)
test_dataset = AudioDataset(test_mfcc, test_tracks)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)
