In [12]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

## Preparing the data

music_data = pd.read_csv("music.csv") #Read the data

music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [13]:
# Split the data (input/Output sets)

X = music_data.drop(columns=["genre"]) #input
Y = music_data["genre"] #Predicted output

Y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [14]:
## Learning and Prediction

model = DecisionTreeClassifier()
model.fit(X, Y) #Train the model

predictions = model.predict([ [21,1], [22,0] ])  # Predicting genre for a 21-year-old male and a 22-year-old female
predictions



array(['HipHop', 'Dance'], dtype=object)

In [15]:
## Calculating accuracy

from sklearn.model_selection import train_test_split #helpful for splitting the data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) #Splitting the data into training and testing sets.

'''
0.2 means 20% of the data will be used for testing and 80% for training.
It returns an tuple of four elements: X_train, X_test, Y_train, Y_test.
'''

print(X_test) #Print the test data

model.fit(X_train, Y_train) #Train the model with training data

predictions = model.predict(X_test) #Predicting the genre for the test data

predictions




    age  gender
11   25       0
6    31       1
12   26       0
14   30       0


array(['Dance', 'Classical', 'Acoustic', 'Acoustic'], dtype=object)

In [None]:
from sklearn.metrics import accuracy_score

score = accuracy_score(Y_test, predictions) #Calculating the accuracy of the model by comparing the actual and predicted values
score

# Accuracy increases with more data

1.0

In [None]:
## Storing the model as a file
# This allows you to reuse the model without retraining it every time.

!pip install joblib

import joblib

joblib.dump(model, 'music_recommender.joblib') #Saving the model as a file




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


['music_recommender.joblib']