<a href="https://colab.research.google.com/github/Swayms-stack/ML-Projects/blob/main/SIMPLE_MUSIC_RECOMMENDATION_SYSTEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''STEPS IN MACHINE LEARNING'''
# 1. IMPORT THE DATA
# 2. CLEAN THE DATA
# 3. SPLIT THE DATA INTO TRAINING & TEST SETS
# 4. CREATE A MODEL
# 5. TRAIN THE MODEL
# 6. MAKE PREDICTIONS
# 7. EVALUATUATION AND IMPROVEMENT (TO OPTIMIZE THE ACCURACY)

'STEPS IN MACHINE LEARNING'

In [2]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Import the file into Google Colab drive
from google.colab import files 
# Import io, which opens the file from the Colab drive
import io
%matplotlib inline
print ('Modules Imported')

Modules Imported


***IMPORTING THE DATASET***

In [3]:
# Uploading the dataset
uploaded = files.upload ()

Saving music.csv to music.csv


In [4]:
# Loading the dataset
df = pd.read_csv(io.BytesIO(uploaded.get('music.csv')))
df

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


***PREPARING THE DATA***

In [5]:
# Null Values
df.isnull().sum()

age       0
gender    0
genre     0
dtype: int64

In [6]:
# Shape of the dataset (Number of rows and columns)
df.shape 

(18, 3)

In [7]:
# Columns in the df
list (df)

['age', 'gender', 'genre']

In [8]:
# Concise summary of the DataFrame
df.info ()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   age     18 non-null     int64 
 1   gender  18 non-null     int64 
 2   genre   18 non-null     object
dtypes: int64(2), object(1)
memory usage: 560.0+ bytes


In [9]:
# Statistical Summary of the DataFrame
df.describe ()

Unnamed: 0,age,gender
count,18.0,18.0
mean,27.944444,0.5
std,5.12746,0.514496
min,20.0,0.0
25%,25.0,0.0
50%,28.0,0.5
75%,31.0,1.0
max,37.0,1.0


In [10]:
# SPLIT THE DF INTO INPUT (age,gender) & OUTPUT (genre)
# DROP THE GENRE COLUMN 
X = df.drop (columns = ['genre'])  # X-axis
Y = df ['genre']  # GENRE will be in the Y-axis
Y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

***Learning and Predicting***

In [11]:
'''Decision tree algorithm falls under the category of supervised learning. 
They can be used to solve both regression and classification problems.'''
from sklearn.tree import DecisionTreeClassifier
# Create an object and set it to a new instance of this class 
model = DecisionTreeClassifier ()
# Train the model to identify patterns in the data
model.fit (X,Y)  # The fit method takes the two datasets (input & output)
print ('Model Trained')

Model Trained


In [12]:
# Make prediction and store it in a variable called predictions
''' Q. What kind of music does a 21-year old male and a 22-year old female like ?'''
predictions = model.predict ([ [21,1], [22,0] ])
# (Male - 1 & Female - 0)
predictions

array(['HipHop', 'Dance'], dtype=object)

*We can see that the model predicted that a 21-year old male likes HipHop and a 22-year old female likes Dance*

***Measuring Accuracy***

In [13]:
# SPLIT THE DATA INTO TRAINING (80 %) & TEST (20 %) SETS
from sklearn.model_selection import train_test_split
# Module to be imported for measuring the accuarcy 
from sklearn.metrics import accuracy_score
# Call this function. It returns a tuple which can be unpacked into 4 variables
X_train, Y_train, X_test, Y_test = train_test_split (X, Y, test_size = 0.2)  # 20 % of the data is allocated for testing
# X_train, Y_train : Inputs, X_test, Y_test : Outputs

# Pass the training dataset
model.fit (X_train, Y_train)
# Pass the testing dataset
predictions = model.predict (X_test)  # X_test contains input values for testing
# For checking the accuracy, we have to compare the predicted values with the actual values
# Store the output (0 or 1) in a variable called score
score = accuracy_score (Y_test, predictions) 
# Y_test contains actual values & predictions contains predicted values
# Display the score on the console
print (score)

Use Ctrl + / to comment out out the entire code 

In [14]:
# Code For Google Colab
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
df = pd.read_csv ('music.csv')
X = df.drop (columns = ['genre'])
Y = df ['genre']
X_train, Y_train, X_test, Y_test = train_test_split (X, Y, test_size = 0.2)
model = DecisionTreeClassifier ()
model.fit (X,Y)
predictions = model.predict (X)
score = accuracy_score (Y, predictions) 
score

1.0

***Persisting Models***

In [15]:
'''Model Persistence is required to save the trained model into a file. 
So, that we don't need to train the model again. '''

# Joblib is a set of tools to provide lightweight pipelining in Python. 
# Joblib object has method for saving & loading models.

from sklearn.externals import joblib
df = pd.read_csv ('music.csv')
X = df.drop (columns = ['genre'])
Y = df ['genre']

model = DecisionTreeClassifier ()
model.fit (X,Y)
# music-recommender is the name of the file where we save it. 
joblib.dump (model, 'music-recommender.joblib') 




['music-recommender.joblib']

***Visualizing a Decision Tree***

In [17]:
# Exporting the model in visual format
from sklearn import tree
df = pd.read_csv ('music.csv')
X = df.drop (columns = ['genre'])
Y = df ['genre']
model = DecisionTreeClassifier ()
model.fit (X,Y)
tree.export_graphviz (model, out_file = 'music-recommender.dot',          # dot format (graph description language)
                      feature_names = ['age', 'gender'],  # Features of our data
                      class_names = sorted (Y.unique ()),
                      label = 'all',
                      rounded = True,  # rounded edges 
                      filled = True)   # fill every box with different colour

In [None]:
'''For visalization : '''
# 1. Install VS Code
# 2. Drag and Drop the music-recommender.dot file to open it
# 3. Install Joao pinto's version of Graphviz.
# 4. Click on the 3 dots on the top-right corner to preview the file.