In [1]:
# Importing libraries and Inspecting the data
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [2]:
# Cleaning/Preparing Data
# Since data is clean no need of performing cleaning process

In [3]:
# Splitting data set into two sets : input data set and output data set
# Input data set
X = music_data.drop(columns=['genre'])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [4]:
# Output data set
Y = music_data['genre']
Y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [5]:
# Creating model using DecisionTree algorithm
model = DecisionTreeClassifier()
model.fit(X,Y)
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


#### What kind of musics 21 year male and 23 year female like?

In [None]:
predictions = model.predict([ [21, 1], [22, 0] ])
predictions

### How to measusre the accuracy of the model?

In [7]:
# we need to split our data into two sets
# one for training and the other for testing

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
model = DecisionTreeClassifier()
model.fit(X_train, Y_train)
predictions = model.predict(X_test)

score = accuracy_score(Y_test, predictions)
score

1.0

### Persisting Models

In [8]:
# Once in a while we build and train our model and then save it to a file.
# Next time when we are going to predictions we simply load the model from the file and ask it to make predictions. 
# That model is already trained.

In [10]:
# Storing trained model in a file

import pandas as pd
from  sklearn.tree import DecisionTreeClassifier
import joblib

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
Y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X,Y)

joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

In [12]:
# Loading model

model = joblib.load('music-recommender.joblib')
predictions = model.predict([[21, 1]])
predictions

array(['HipHop'], dtype=object)

### Visualizing  Decision Trees

In [15]:
# Import data set. 
# Create input and output datasets. 
# Create a model.
# Train model

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
Y = music_data['genre']
model = DecisionTreeClassifier()
model.fit(X,Y)

DecisionTreeClassifier()

In [16]:
import pandas as pd
from  sklearn.tree import DecisionTreeClassifier
from sklearn import tree

tree.export_graphviz(model, out_file='music-recommender.dot',
                    feature_names=['age', 'gender'],
                    class_names=sorted(Y.unique()),
                    label='all',
                    rounded=True,
                    filled=True)
