In [1]:
import pandas as pd
#Used for easy handling of tabular data, like the Spotify tracks dataset.

In [3]:
import seaborn as sns
#to explore distribution and outliers

In [5]:
import matplotlib.pyplot as plt
#to display the visualizations or  charts.

In [7]:
from sklearn.preprocessing import StandardScaler
#To ensure all features have the same scale.

In [9]:
from sklearn.preprocessing import LabelEncoder
#To convert text into numeric codes.

In [13]:
from sklearn.model_selection import train_test_split
#To evaluate how well the model generalizes to unseen data.

In [15]:
from sklearn.ensemble import RandomForestClassifier
#to predict the genre based on audio features.

In [17]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
#accuracy_score:To show overall correctness
#confusion_matrix:To show true vs. predicted values
#classification_report: includes precision, recall, and F1-score

In [33]:
#Loading Data
df = pd.read_csv("SpotifyFeatures.csv")

In [35]:
#Selecting features
features = ['danceability', 'energy', 'acousticness', 'instrumentalness',
            'loudness', 'tempo']
df = df.dropna(subset=features + ['genre'])
X = df[features]
y = df['genre']

In [37]:
#Encoding target
le = LabelEncoder()
y = le.fit_transform(y)

In [39]:
#Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [41]:
#Train-test split and model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, stratify=y, test_size=0.2, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [42]:
#Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.24354925341067785
Confusion Matrix:
 [[   0    0    0    0    0    0    0    1    1    0    0    0    0    1
     1    5   10    0    1    0    0    2    0    0    0    0    2]
 [   0   29   35   44    2  725    1    3   45   58   49   47   55  115
    13    5    2   46  102   73   22   17  189   78   42    5   51]
 [   0   23  489   48   19   31  102    7  127   56  127   28    4   28
    74   26   32   15   13    4   15   36   18  157   22  209   77]
 [   0   44   53  332   44   35    9   26   95   32   80  166   15   42
   154   59   15   10   30    4   45   37  100  105  174   17   82]
 [   0    2   12   42  397    0   15   22   39    9   32   51   33    9
    55   91   20   15   21   20   66   54    5    2   45   10   14]
 [   0  737   23   30    4    8    1    2   33   48   21   51   63  192
    12    3    1   45  121   51   12    7  231   51   56    3   65]
 [   0   12   44   22    9    5  925    4    5    1    8   16    0    4
    31   33  304    1    2    0    2   

In [None]:
 #In this task, we built a classification model to predict song genres using audio features like tempo, energy, and danceability. 
#By preprocessing the data, visualizing feature relationships, and training models like Random Forest,
#we demonstrated how machine learning can effectively classify music genres based on acoustic characteristics.