# Machine Learning

# 1: Importing the dataset

In [76]:
import pandas as pd

music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


# 2: Preparing the data (split into different sets)

In [3]:
music_data = pd.read_csv('music.csv')
x = music_data.drop(columns=['genre'])
y = music_data['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

# 3: Learning and predicting (model implementation)

In [7]:
from sklearn.tree import DecisionTreeClassifier

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X.values, y)
predictions = model.predict([[21 ,1], [25, 0], [30, 1]])
print(predictions)

['HipHop' 'Dance' 'Jazz']


# 4: Evaluating accuracy of model

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv('music.csv')
x = music_data.drop(columns=['genre'])
y = music_data['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 
#20% of data to testing

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions)
print(score)

1.0


# 5: Persisting models part 1 (Storing trained model in a file)

In [9]:
import joblib

music_data = pd.read_csv('music.csv')
x = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X.values, y)

joblib.dump(model, 'music-recommender.joblib') # Will create binary file in same directory 

['music-recommender.joblib']

# 5: Persisting models part 2 (Return the trained model with only 2 lines)

In [10]:
model = joblib.load('music-recommender.joblib') # Calling the previously created binary file
predictions = model.predict([[25, 1]])
predictions

array(['HipHop'], dtype=object)

# 6: Visualizing an external decision tree

In [11]:
from sklearn import tree

music_data = pd.read_csv('music.csv')
x = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X.values, y)

tree.export_graphviz(model, out_file='music-recommender.dot',
                     feature_names=['age', 'gender'],
                     class_names=sorted(y.unique()),
                     label='all',
                     rounded=True,
                     filled=True)

# Drag and drop file into VScode
# Install 'Graphviz Interactive Preview' in extensions panel 