In [142]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv("music.csv")
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train,y_train)
predictions= model.predict(X_test)

# Lets measure the accurracy
score = accuracy_score(y_test,predictions)
score

1.0

In [146]:
# Persistence => We want to save our model as in real life our trained model could take hours

# Therefore it would be very inefficient if we had to retrain it constantly.

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib


music_data = pd.read_csv("music.csv")
X = music_data.drop(columns=["genre"])
y = music_data["genre"]

model = DecisionTreeClassifier()
model.fit(X,y)

joblib.dump(model,"music-recommender.joblib")

['music-recommender.joblib']

In [147]:
# Persisted model

model = joblib.load("music-recommender.joblib")
predictions = model.predict([[21,1]])
predictions




array(['HipHop'], dtype=object)

In [148]:
# Show visually the decision tree

from sklearn import tree


tree.export_graphviz(model,out_file="music-recommender.dot",
feature_names=["age","gender"],
class_names=sorted(y.unique()),
label="all",
rounded=True,
filled=True

)