In [91]:
# Taking a spreadsheet of which age-group, which gender, what type of music they listen to,
# I evaluate the accuracy of what the machine learning algorythm predicted

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions)
score

0.75

In [99]:
# Taking the same spreadsheet, first I dump it into a file, using joblib from sklearn,
# then recall it from that and make a predicion based on a manually selected input
# Unfortunately due to the updates to joblib, this prediction is now different than what it should have been

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib

# music_data = pd.read_csv('music.csv')
# X = music_data.drop(columns=['genre'])
# y = music_data['genre']

# model = DecisionTreeClassifier()
# model.fit(X, y)

# joblib.dump(mode, 'music-recommender.joblib')
model = joblib.load('music-recommender.joblib')
predicitions = model.predict([ [21, 1] ])
predictions

array(['Jazz', 'HipHop', 'Dance', 'Classical'], dtype=object)

In [100]:
# Taking the same spreadsheet, again, I export the decision tree in a visual format
# Using the .dot format, it can be viewed in VSCode using a .dot visualizer

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv('music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X, y)

tree.export_graphviz(model, out_file='music-recommender.dot', feature_names=['age', 'gender'],
                     class_names=sorted(y.unique()), label='all', rounded=True, filled=True)