In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load dataset (replace with actual file path if needed)
df = pd.read_csv("Last.fm_data.csv")

# Check dataset structure
print(df.head())

# Create 'repeated_play' target column
df['repeated_play'] = df.duplicated(subset=['Username', 'Track'], keep=False).astype(int)

# Encode categorical features
label_encoders = {}
categorical_features = ['Artist', 'Track', 'Album']
for col in categorical_features:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Selecting features (excluding non-numeric and target column)
features = [col for col in df.columns if col not in ['repeated_play', 'Username', 'Date', 'Time']]
target = "repeated_play"

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42, stratify=df[target])

# Training RandomForest model
model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Generating song recommendations
df["replay_probability"] = model.predict_proba(df[features])[:, 1]
top_songs = df.sort_values("replay_probability", ascending=False)[["Track", "replay_probability"]].head(10)
print("Top Recommended Songs:")
print(top_songs)


   Unnamed: 0 Username           Artist                          Track  \
0           0  Babs_05  Isobel Campbell     The Circus Is Leaving Town   
1           1  Babs_05  Isobel Campbell                   Dusty Wreath   
2           2  Babs_05  Isobel Campbell     Honey Child What Can I Do?   
3           3  Babs_05  Isobel Campbell  It's Hard To Kill A Bad Thing   
4           4  Babs_05  Isobel Campbell                Saturday's Gone   

                       Album         Date    Time  
0  Ballad of the Broken Seas  31 Jan 2021   23:36  
1  Ballad of the Broken Seas  31 Jan 2021   23:32  
2  Ballad of the Broken Seas  31 Jan 2021   23:28  
3  Ballad of the Broken Seas  31 Jan 2021   23:25  
4  Ballad of the Broken Seas  31 Jan 2021   23:21  
Accuracy: 0.838795100959947
              precision    recall  f1-score   support

           0       0.84      0.92      0.88     20645
           1       0.85      0.70      0.77     12586

    accuracy                           0.84     332