In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree

In [2]:
# 1. import data

df = pd.read_csv('music.csv')
df.head()

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz


In [3]:
# 2. data preparation - split to input and output dataframes

X = df.drop(columns=['genre'])
y = df['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
# 3. split into training and test sets
# 4. build a model
# 5. train the model
# 6. make predictions

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array(['Dance', 'Jazz', 'Acoustic', 'Dance'], dtype=object)

In [5]:
# 7. accuracy of the model

score = accuracy_score(y_test, predictions)
score

0.5

# Persistence

In [6]:
# for saving and loading models

X2 = df.drop(columns=['genre'])
y2 = df['genre']

model = DecisionTreeClassifier()
model.fit(X2, y2)

joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

In [7]:
model2 = joblib.load('music-recommender.joblib')
predictions2 = model2.predict([[21,1]])
predictions2



array(['HipHop'], dtype=object)

In [8]:
# visualization of decision tree

tree.export_graphviz(model2, out_file='music-recommender.dot',
                    feature_names =['age', 'gender'],
                    class_names=sorted(y.unique()),
                    label='all',
                    rounded=True,
                    filled=True)