In [1]:
import pandas as pd
music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [2]:
music_data.shape

(18, 3)

In [3]:
# cleaning data / preparing data
music_data.describe()

Unnamed: 0,age,gender
count,18.0,18.0
mean,27.944444,0.5
std,5.12746,0.514496
min,20.0,0.0
25%,25.0,0.0
50%,28.0,0.5
75%,31.0,1.0
max,37.0,1.0


In [4]:
# cleaning data / preparing data
# Split the data into 2 sets
# 1. input set {age, gender}
# 2. output set {genre}
X = music_data.drop(columns=['genre'])
X


Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [5]:
# 2. output set -y
y = music_data['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [6]:
# learning and predicting
# Decision Tree
from sklearn.tree import DecisionTreeClassifier


# model
model = DecisionTreeClassifier()
model.fit(X, y)
predictions = model.predict([ [21, 1], [22, 0]])



In [7]:
predictions

array(['HipHop', 'Dance'], dtype=object)

In [8]:

# calculating the Accuracy
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [9]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array(['Jazz', 'HipHop', 'HipHop', 'Classical'], dtype=object)

In [10]:
print(X_train.shape, y_train.shape)

(14, 2) (14,)


In [11]:
X_train

Unnamed: 0,age,gender
3,26,1
8,37,1
15,31,0
12,26,0
0,20,1
14,30,0
10,21,0
4,29,1
7,33,1
11,25,0


In [12]:
y_train

3          Jazz
8     Classical
15    Classical
12     Acoustic
0        HipHop
14     Acoustic
10        Dance
4          Jazz
7     Classical
11        Dance
9         Dance
6     Classical
13     Acoustic
17    Classical
Name: genre, dtype: object

In [13]:
X_test

Unnamed: 0,age,gender
5,30,1
2,25,1
1,23,1
16,34,0


In [14]:
y_test

5          Jazz
2        HipHop
1        HipHop
16    Classical
Name: genre, dtype: object

In [15]:
print(X_test.shape, y_test.shape)

(4, 2) (4,)


In [16]:
predictions

array(['Jazz', 'HipHop', 'HipHop', 'Classical'], dtype=object)

In [17]:
# Accuracy
from sklearn.metrics import accuracy_score

score = accuracy_score(y_test, predictions)
score*100

100.0

In [18]:
# Persisting Models
# from sklearn.externals import joblib
import joblib

# joblib.dump(model, 'music-recommender.joblib')

In [19]:
model = joblib.load('music-recommender.joblib')
predictions = model.predict([ [21, 0] ])



In [20]:
predictions

array(['Dance'], dtype=object)

In [21]:
# Visualizing a Decision Tree
from sklearn import tree

tree.export_graphviz(model, out_file='music-recommender.dot', feature_names=['age', 'gender'],
                     class_names=sorted(y.unique()), label='all', rounded=True, filled=True
                    )

In [22]:
sorted(y.unique())

['Acoustic', 'Classical', 'Dance', 'HipHop', 'Jazz']

In [23]:
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance
