In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree

## Step1 : Import Data 

In [51]:
music_df = pd.read_csv('music.csv')
music_df

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


## Step2 : Clean Data

In [52]:
# the genre calumn is our predection target 
# we remove it and store it in a variable generally called 'y' in lowercase
# the remaining df is stored in capitalcase X
X = music_df.drop(columns=['genre'])
y = music_df['genre']

## Step3 : split data

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Step4 : build a model

In [54]:
model = DecisionTreeClassifier()

## Step5 : train the model

In [55]:
model.fit(X_train, y_train)

## Step6 : make predictions

In [56]:
predictions = model.predict(X_test)

## Step7 : evaluate the model

In [57]:
score = accuracy_score(y_test, predictions)
print('model accuracy = ' + str(score))

model accuracy = 0.75


### Dump the model to avoid recreating it each time
### This creates a job file

In [58]:
joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

### Load the model

In [60]:
model_loaded = joblib.load('music-recommender.joblib')

predictions = model_loaded.predict(X_test)
print(predictions)

['Dance' 'Classical' 'Classical' 'HipHop']


### Visualize the decision tree

In [62]:
tree.export_graphviz(model, out_file='music-recommender.dot',
                     feature_names=['age','gender'],
                     class_names=sorted(y.unique()),
                     label='all',
                     rounded=True, #border radius
                     filled=True) #colors

In [6]:
import pydot

graphs = pydot.graph_from_dot_file('music-recommender.dot')
graph = graphs[0]
graph.write_png('output.png')