<h1 style="color: orange; text-align: center;"> Machine Learning - Basic Predictions </hi>

<h1 style="color: white;"> Imports </h1>

In [4]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
pd.set_option('display.max_rows', 500)

<h1 style= "color: orange; font-weight: bold; "> Import Data </h1>

In [5]:
filename: str = 'music.csv'
df: pd.DataFrame = pd.read_csv(f'{filename}')

print(df.shape)
display(df)

# 1 representing male 0 representing female

(18, 3)


Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


<h1 style="color:orange; font-weight: bold;"> Clean Data </h1>
<p> our data is already pretty clean... </p>

In [6]:
## Data: Age, Gender, Genre

# Features: Age and Gender
X: pd.DataFrame = df.drop(columns=['genre'])
# Labels: Genre
y: pd.DataFrame = df.drop(columns=['age','gender'])
print("Features: ")
display(X)
print("Labels: ")
display(y)

Features: 


Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


Labels: 


Unnamed: 0,genre
0,HipHop
1,HipHop
2,HipHop
3,Jazz
4,Jazz
5,Jazz
6,Classical
7,Classical
8,Classical
9,Dance


<h1 style="color:orange; font-weight: bold;"> Split the date into sets training/testing </h1>

In [7]:
%%time
# Total data in X => [][][][][],  X_train => [x][x][x][x][],  ....[] the last 20% is used for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
display(X_train.sort_index())


Unnamed: 0,age,gender
0,20,1
1,23,1
3,26,1
4,29,1
5,30,1
7,33,1
8,37,1
9,20,0
10,21,0
11,25,0


Wall time: 5.99 ms


<h1 style="color:orange; font-weight: bold;"> Create a Model and Train the Model </h1>

In [8]:
modelname: str = 'music-recommender.joblib'
trainModel: bool = False

# Load trained model else train new model
try:
    loadModel = joblib.load(modelname)
except FileNotFoundError:
    print(f"Model {modelname} not found! Training model {modelname} ...")
    trainModel = True

model = ""

if(trainModel):
    model = tree.DecisionTreeClassifier()
    # Will work .values but this is better form
    model.fit(X_train.values, y_train.values)
else:
    model = loadModel


# Save model under music-recommender.joblib
joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

<h1 style="color:orange; font-weight: bold;"> Make a Prediction </h1>

In [9]:
# Predict the music choice of a 21yr male
# Predict the music choice of a 22yr female
predictions: np.array = model.predict([[21, 1], [35, 0]])
print(f"{type(predictions)}")
print(predictions)



# random 80 - 20 split datat split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# model trains on the 80 split to predict new data
model.fit(X_train.values, y_train.values)

# Features 20 split is used to make predictions
predictions_on_test = model.predict(X_test)

# Labels 2o split is used to check predictions
score = accuracy_score(y_test, predictions_on_test)

# Guess are checked and a level of accuracy is given
print(score)

<class 'numpy.ndarray'>
['HipHop' 'Classical']
0.75




<h1 style="color:orange; font-weight: bold;"> Evaluate Predictions </h1>

In [10]:
tree.export_graphviz(model, out_file='music-recommender.dot', feature_names=['age','gender'], class_names=y['genre'].unique(), label='all', rounded=True, filled=True)
# Open music-recommender.dot in vscode and use the shortcut: ' ctrl + k, v ' to view the decision tree