## Import & Setup Path

In [None]:
import pandas as pd
import numpy as np
import os

RAW_DATA_PATH = "../data/raw/spotify_music.csv"
PROCESSED_DIR = "../data/processed"
PROCESSED_DATA_PATH = os.path.join(PROCESSED_DIR, "spotify_labeled.csv")
MODEL_DIR = "../model"

os.makedirs(PROCESSED_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)


## Load Dataset

In [None]:
df = pd.read_csv(RAW_DATA_PATH) = RAW_DATA_PATH = "data/raw/dataset.csv"
df.head()


## Select & Clean Columns

In [None]:
columns_needed = [
    "track_name",
    "artist_name",
    "danceability",
    "energy",
    "valence",
    "tempo",
    "loudness",
    "acousticness",
    "speechiness",
    "instrumentalness"
]

df = df[columns_needed]
df = df.dropna()
df.reset_index(drop=True, inplace=True)

df.info()


## Mood Labeling Function (INTI)

In [None]:
def label_mood(row):
    if row["energy"] < 0.4 and row["valence"] < 0.4:
        return "Sad"
    elif row["energy"] < 0.4 and row["valence"] >= 0.4:
        return "Calm"
    elif row["energy"] >= 0.6 and row["valence"] >= 0.6:
        return "Happy"
    else:
        return "Energetic"


## Apply Labeling

In [None]:
df["mood"] = df.apply(label_mood, axis=1)
df["mood"].value_counts()


## Save Processed Dataset

In [None]:
df.to_csv(PROCESSED_DATA_PATH, index=False) = RAW_DATA_PATH = "data/raw/dataset.csv"
print(f"Dataset saved to {PROCESSED_DATA_PATH}") = RAW_DATA_PATH = "data/raw/dataset.csv"


## Train Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

X = df[
    [
        "danceability",
        "energy",
        "valence",
        "tempo",
        "loudness",
        "acousticness",
        "speechiness",
        "instrumentalness"
    ]
]

y = df["mood"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## Model Training

In [None]:
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

model.fit(X_train, y_train)


## Evaluation

In [None]:
y_pred = model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


## Save Model

In [None]:
MODEL_PATH = os.path.join(MODEL_DIR, "model/mood_model.pkl")
joblib.dump(model, MODEL_PATH)

print(f"Model saved to {MODEL_PATH}")
