In [8]:
# Idea of persisting models is that larger datasets may take minutes or hours to train a model so we don't wanna build and train a model everytime the program runs. Ideal practice is to every once in a while build and train a model and save it for later to make predictions in the program.

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# --------------------------------------------------------------------------------------------------------------------
from sklearn.externals import joblib
# joblib object has methods for saving and loading models
# --------------------------------------------------------------------------------------------------------------------

# # Importing the data
music_data = pd.read_csv('music.csv')
# # Preparing data - X (input set) and y (output set). Note: No cleaning is required since there are no null values or duplicates in the dataset
X = music_data.drop(columns=['genre'])
y = music_data['genre']
# # Spliting input and output dataset into input and output training and testing dataset (usually keep 20% of dataset from testing while 80% fro training the model). Note: More data we give the model to train the more accurate predictions it'll make.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# # Learning and Predicting
# # Create a model using a machine learning algorithm like DecisionTree or Neural-Network etc. The choice of algorithm depends on its performance and accuracy
model = DecisionTreeClassifier()
# # Training model with input and output training set
model.fit(X_train, y_train)

# --------------------------------------------------------------------------------------------------------------------
# After training the model, save the model as a .joblib file
joblib.dump(model, 'recommender.joblib')
# --------------------------------------------------------------------------------------------------------------------

# --------------------------------------------------------------------------------------------------------------------
# Load the trained model
model = joblib.load('recommender.joblib')
# --------------------------------------------------------------------------------------------------------------------

# Making predictions with input test dataset
predictions = model.predict(X_test)
predictions2 = model.predict([[21, 1], [39, 0]]) 
#returns 'array(['HipHop', 'Classical'], dtype=object)'
print(predictions)
print(predictions2)
score = accuracy_score(y_test, predictions)
score


['Jazz' 'Classical' 'HipHop' 'Acoustic']
['HipHop' 'Classical']


1.0