### Import all the libraries first.

In [113]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree

### Inspect the csv dataset

In [114]:
music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


### Prepare two arrays, one for input and one for the expected output / what we want the model to predict

### Here X will be our input array, while y will be our expected output array

In [115]:
X = music_data.drop(columns=['genre'])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [116]:
y = music_data['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

### Use the DecisionTreeClassifier model, takes in 2 arguments (The input and expected output arrays)

In [117]:
model = DecisionTreeClassifier()
model.fit(X, y)

DecisionTreeClassifier()

### Pretty straight-forward, give an array with what input you want the model to predict the output of

In [118]:
predictions = model.predict([ [21, 1], [22, 0] ])
predictions

array(['HipHop', 'Dance'], dtype=object)

### It's better if we split the dataset into training and testing arrays. The test_size determines how much (in percentage) of the dataset you want to test the model with

In [119]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8)

### Now let's use the arrays for split testing we sut up above

In [120]:
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array(['Classical', 'Classical', 'Classical', 'HipHop', 'Dance',
       'Classical', 'Classical', 'HipHop', 'Dance', 'Classical', 'Dance',
       'Dance', 'Classical', 'Classical', 'HipHop'], dtype=object)

In [121]:
score = accuracy_score(y_test, predictions)
score

0.6

In [122]:
joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

### Let's use the generated model

In [123]:
model = joblib.load('music-recommender.joblib')
predictions = model.predict([ [21, 1] ])
predictions

array(['HipHop'], dtype=object)

In [124]:
tree.export_graphviz(model, out_file='music-recommender.dot', feature_names=['age', 'gender'], class_names=sorted(y.unique()), label='all', rounded=True, filled=True)