In [None]:
import pandas as pd

# from sklearn.linear_model import Ridge
from reservoirpy.nodes import Reservoir, Ridge, ESN
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Load the separated input data from CSV files
input_genres_tags_data = pd.read_csv("../data/train/input_genres_tags_data.csv")
input_instruments_tags_data = pd.read_csv(
    "../data/train/input_instruments_tags_data.csv"
)
input_moods_tags_data = pd.read_csv("../data/train/input_moods_tags_data.csv")

genres_categories_data = pd.read_csv("../data/train/genres_categories_data.csv")
instruments_categories_data = pd.read_csv(
    "../data/train/instruments_categories_data.csv"
)
moods_categories_data = pd.read_csv("../data/train/moods_categories_data.csv")

# Load the separated output data from CSV files
output_genres_tags_data = pd.read_csv("../data/train/output_genres_tags_data.csv")
output_instruments_tags_data = pd.read_csv(
    "../data/train/output_instruments_tags_data.csv"
)
output_moods_tags_data = pd.read_csv("../data/train/output_moods_tags_data.csv")

In [3]:
# Print the first few rows of each class to verify
print("Input Genres Tags Data:")
print(input_genres_tags_data.head())

print("\nInput Instruments Tags Data:")
print(input_instruments_tags_data.head())

print("\nInput Moods Tags Data:")
print(input_moods_tags_data.head())

print("\nGenres Categories Data:")
print(genres_categories_data.head())

print("\nInstruments Categories Data:")
print(instruments_categories_data.head())

print("\nMoods Categories Data:")
print(moods_categories_data.head())

print("\nOutput Genres Tags Data:")
print(output_genres_tags_data.head())

print("\nOutput Instruments Tags Data:")
print(output_instruments_tags_data.head())

print("\nOutput Moods Tags Data:")
print(output_moods_tags_data.head())

Input Genres Tags Data:
   ChallengeID  acid-jazz  alternative-rock  americana  blaxpoitation  \
0            0   0.000023          0.006455   0.001339       0.000159   
1            1   0.000069          0.056431   0.008977       0.000347   
2            2   0.000039          0.017784   0.001067       0.000290   
3            3   0.000047          0.004563   0.000289       0.000520   
4            4   0.000022          0.006412   0.001309       0.000204   

   bluegrass  blues-rock  bossa-nova  breakbeat  chill-out  ...    techno  \
0   0.000045    0.000191    0.000087   0.014584   0.007135  ...  0.013838   
1   0.000536    0.001447    0.000093   0.047316   0.019435  ...  0.003960   
2   0.000021    0.001512    0.000033   0.109496   0.008511  ...  0.031038   
3   0.000013    0.000264    0.000026   0.265472   0.004216  ...  0.085144   
4   0.000025    0.001302    0.000039   0.008771   0.001732  ...  0.016197   

   traditional-folk    trance      trap  trip-hop  uptown-soul  vintage-ja

In [3]:
# Split the data into training and testing sets

# Split genres tags data
X_genres_train, X_genres_test, y_genres_train, y_genres_test = train_test_split(
    input_genres_tags_data, output_genres_tags_data, test_size=0.2, random_state=42
)

# Split instruments tags data
X_instruments_train, X_instruments_test, y_instruments_train, y_instruments_test = (
    train_test_split(
        input_instruments_tags_data,
        output_instruments_tags_data,
        test_size=0.2,
        random_state=42,
    )
)

# Split moods tags data
X_moods_train, X_moods_test, y_moods_train, y_moods_test = train_test_split(
    input_moods_tags_data, output_moods_tags_data, test_size=0.2, random_state=42
)

# Optionally, you can print the shapes to verify
print(f"Genres training data shape: {X_genres_train.shape}, {y_genres_train.shape}")
print(f"Genres testing data shape: {X_genres_test.shape}, {y_genres_test.shape}")

print(
    f"Instruments training data shape: {X_instruments_train.shape}, {y_instruments_train.shape}"
)
print(
    f"Instruments testing data shape: {X_instruments_test.shape}, {y_instruments_test.shape}"
)

print(f"Moods training data shape: {X_moods_train.shape}, {y_moods_train.shape}")
print(f"Moods testing data shape: {X_moods_test.shape}, {y_moods_test.shape}")


Genres training data shape: (88683, 91), (88683, 91)
Genres testing data shape: (22171, 91), (22171, 91)
Instruments training data shape: (88683, 113), (88683, 113)
Instruments testing data shape: (22171, 113), (22171, 113)
Moods training data shape: (88683, 47), (88683, 47)
Moods testing data shape: (22171, 47), (22171, 47)


In [4]:
# Prepare the data for training
X_genres_train = X_genres_train.drop(columns=["ChallengeID"])
X_instruments_train = X_instruments_train.drop(columns=["ChallengeID"])
X_moods_train = X_moods_train.drop(columns=["ChallengeID"])
y_genres_train = y_genres_train.drop(columns=["ChallengeID"])
y_instruments_train = y_instruments_train.drop(columns=["ChallengeID"])
y_moods_train = y_moods_train.drop(columns=["ChallengeID"])

X_genres_test = X_genres_test.drop(columns=["ChallengeID"])
X_instruments_test = X_instruments_test.drop(columns=["ChallengeID"])
X_moods_test = X_moods_test.drop(columns=["ChallengeID"])
y_genres_test = y_genres_test.drop(columns=["ChallengeID"])
y_instruments_test = y_instruments_test.drop(columns=["ChallengeID"])
y_moods_test = y_moods_test.drop(columns=["ChallengeID"])

In [None]:
# Define the reservoir and readout models
reservoir_Genre = Reservoir(
    units=50,  # Number of neurons
    sr=0,  # Spectral radius of the weight matrix
    lr=1,  # Leak rate
    input_scaling=1.0,  # Input scaling factor
)

readout_Genre = Ridge(ridge=1e-4)  # Regularization: penalizes overfitting
# model_Genre = reservoir_Genre >> readout_Genre
model_Genre = ESN(reservoir=reservoir_Genre, readout=readout_Genre, workers=-1)

reservoir_Instrument = Reservoir(
    units=50,  # Number of neurons
    sr=0,  # Spectral radius of the weight matrix
    lr=1,  # Leak rate
)
readout_Instrument = Ridge(ridge=1e-4)  # Regularization: penalizes overfitting
# model_Instrument = reservoir_Instrument >> readout_Instrument
model_Instrument = ESN(
    reservoir=reservoir_Instrument, readout=readout_Instrument, workers=-1
)

reservoir_Mood = Reservoir(
    units=50,  # Number of neurons
    sr=0,  # Spectral radius of the weight matrix
    lr=1,  # Leak rate
)
readout_Mood = Ridge(ridge=1e-4)  # Regularization: penalizes overfitting
# model_Mood = reservoir_Mood >> readout_Mood
model_Mood = ESN(reservoir=reservoir_Mood, readout=readout_Mood, workers=-1)

In [None]:
# Ensure the input data is in the correct format
X_genres_train_reshaped = X_genres_train.values.reshape(-1, 1, X_genres_train.shape[1])
X_instruments_train_reshaped = X_instruments_train.values.reshape(
    -1, 1, X_instruments_train.shape[1]
)
X_moods_train_reshaped = X_moods_train.values.reshape(-1, 1, X_moods_train.shape[1])

y_genres_train_reshaped = y_genres_train.values.reshape(-1, 1, y_genres_train.shape[1])
y_instruments_train_reshaped = y_instruments_train.values.reshape(
    -1, 1, y_instruments_train.shape[1]
)
y_moods_train_reshaped = y_moods_train.values.reshape(-1, 1, y_moods_train.shape[1])

# Shape of the input data
print(f"Genres training data shape: {X_genres_train.shape}, {y_genres_train.shape}")
print(
    f"Instruments training data shape: {X_instruments_train.shape}, {y_instruments_train.shape}"
)
print(f"Moods training data shape: {X_moods_train.shape}, {y_moods_train.shape}")
# Shape of the reshape data
print(
    f"Genres training data reshape: {X_genres_train_reshaped.shape}, {y_genres_train_reshaped.shape}"
)
print(
    f"Instruments training data reshape: {X_instruments_train_reshaped.shape}, {y_instruments_train_reshaped.shape}"
)
print(
    f"Moods training data reshape: {X_moods_train_reshaped.shape}, {y_moods_train_reshaped.shape}"
)


Genres training data shape: (88683, 90), (88683, 90)
Instruments training data shape: (88683, 112), (88683, 112)
Moods training data shape: (88683, 46), (88683, 46)
Genres training data reshape: (88683, 1, 90), (88683, 1, 90)
Instruments training data reshape: (88683, 1, 112), (88683, 1, 112)
Moods training data reshape: (88683, 1, 46), (88683, 1, 46)


In [None]:
# Train the models with one line for single timestep
model_Genre.fit(X_genres_train_reshaped, y_genres_train_reshaped)
model_Instrument.fit(X_instruments_train_reshaped, y_instruments_train_reshaped)
model_Mood.fit(X_moods_train_reshaped, y_moods_train_reshaped)

print("Models trained successfully.")

RuntimeError: Impossible to initialize Ridge-10: output dimension was not specified at creation, and no teacher vector was given.

In [None]:
# Test the models
X_genres_test_reshaped = X_genres_test.values.reshape(-1, 1, X_genres_test.shape[1])

X_instruments_test_reshaped = X_instruments_test.values.reshape(
    -1, 1, X_instruments_test.shape[1]
)

X_moods_test_reshaped = X_moods_test.values.reshape(-1, 1, X_moods_test.shape[1])

print(f"Genres testing data shape: {X_genres_test_reshaped.shape}")
print(f"Instruments testing data shape: {X_instruments_test_reshaped.shape}")
print(f"Moods testing data shape: {X_moods_test_reshaped.shape}")


Genres testing data shape: (22171, 1, 90)
Instruments testing data shape: (22171, 1, 112)
Moods testing data shape: (22171, 1, 46)


In [66]:
y_genres_pred = model_Genre.run(X_genres_test_reshaped)
y_instruments_pred = model_Instrument.run(X_instruments_test_reshaped)
y_moods_pred = model_Mood.run(X_moods_test_reshaped)

Running ESN-5: 100%|██████████| 22171/22171 [00:00<00:00, 37488.35it/s]
Running ESN-6: 100%|██████████| 22171/22171 [00:00<00:00, 34534.81it/s]
Running ESN-7: 100%|██████████| 22171/22171 [00:00<00:00, 46451.60it/s]


In [None]:
def save_predictions_to_csv(predictions_list, filename=None):
    # Convert the list to a NumPy array
    predictions_array = np.array(predictions_list)

    # Reshape the array to 2-dimensional
    predictions_reshaped = predictions_array.reshape(-1, predictions_array.shape[-1])

    # Convert the reshaped array to a DataFrame
    predictions_df = pd.DataFrame(predictions_reshaped)

    # Save the DataFrame to a CSV file with the name of the predictions_list variable
    csv_filename = (
        f"../data/predictions/{filename}.csv"
        if filename
        else f"../data/predictions/{predictions_list}.csv"
    )
    predictions_df.to_csv(csv_filename, index=False)

    # Display the DataFrame
    print(predictions_df)

    return predictions_df


# Example usage
y_genres_pred_df = save_predictions_to_csv(y_genres_pred, "y_genres_pred")
y_instruments_pred_df = save_predictions_to_csv(
    y_instruments_pred, "y_instruments_pred"
)
y_moods_pred_df = save_predictions_to_csv(y_moods_pred, "y_moods_pred")


             0         1         2         3         4         5         6   \
0      0.031810 -0.002292 -0.000780  0.006762  0.005843 -0.005215  0.006930   
1      0.003467  0.055604 -0.014927 -0.002351 -0.011058 -0.028373  0.012738   
2     -0.001942 -0.008158 -0.008340  0.000022  0.004744  0.012558 -0.011670   
3      0.004938 -0.006553  0.002530 -0.004025 -0.006637  0.007771  0.000060   
4     -0.027347  0.012869  0.016300  0.011433  0.008072  0.036095  0.070647   
...         ...       ...       ...       ...       ...       ...       ...   
22166  0.001899  0.406027  0.016106 -0.005562  0.003009 -0.001883 -0.013927   
22167  0.003813  0.504271  0.013052  0.001288  0.004430 -0.009569 -0.008167   
22168 -0.000159  0.006254  0.051075 -0.001362  0.004734 -0.004923 -0.007143   
22169  0.007891  0.018443  0.000194 -0.013288  0.006330  0.019331 -0.004334   
22170 -0.018725  0.005785  0.037541  0.003188  0.021589  0.037125  0.008199   

             7         8         9   ...        80 

In [None]:
# Visualize the predictions
def plot_predictions(y_pred, title):
    plt.figure(figsize=(10, 5))
    plt.plot(y_pred, label="Predicted")
    plt.title(title)
    plt.legend()
    plt.show()


In [None]:
plot_predictions(y_genres_pred_df[:100], "Genres Predictions")
plot_predictions(y_instruments_pred_df[:100], "Instruments Predictions")
plot_predictions(y_moods_pred_df[:100], "Moods Predictions")

In [None]:
# Save the models in the folder 'models' using pickle
import pickle

with open("../models/esn_genre.pkl", "wb") as f:
    pickle.dump(model_Genre, f)

with open("../models/esn_instrument.pkl", "wb") as f:
    pickle.dump(model_Instrument, f)

with open("../models/esn_mood.pkl", "wb") as f:
    pickle.dump(model_Mood, f)

print("Models saved successfully.")


Models saved successfully.


In [83]:
with open("../models/esn_genre.pkl", "rb") as f:
    saved_model_genre = pickle.load(f)