# Load Required Library

In [1]:
# import important libraries
import pandas as pd
# import the .csv file to work with
df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
df

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


In [2]:
# To seperate the input set and the output set
X = df[['age','gender']]
y = df['genre']

In [3]:
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

In [4]:
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [5]:
# we can also seperate the input and output usig another method
X = df.drop(columns=['genre'])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


In [9]:
y = df['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

# Learning and Prediction

In [10]:
# import required libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # implement decision tree algorithm
from sklearn.model_selection import train_test_split # with this we can easily split our trainset and

# import the .csv file to work with
df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
X = df.drop(columns=['genre'])
y = df['genre']

#creating an instance of the DecisionTreeClassifier
my_model = DecisionTreeClassifier()
# lets train the dataset to learn our algorithm
my_model.fit(X, y)
# Lets ask our model to make predictions
predict_data = my_model.predict([[19, 1], [18, 0]])
predict_data



array(['HipHop', 'Dance'], dtype=object)

# Measuring or Calculating the Accuracy of the Model

In [12]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # implement decision tree algorithm
from sklearn.model_selection import train_test_split # with this we can easily split our trainset and testset
from sklearn.metrics import accuracy_score

# import the .csv file to work with
df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
X = df.drop(columns=['genre'])
y = df['genre']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#creating an instance of the DecisionTreeClassifier
my_model = DecisionTreeClassifier()

# lets train the dataset to learn our algorithm
my_model.fit(X_train, y_train)

# Lets ask our model to make predictions
predict_data = my_model.predict(X_test)

result = accuracy_score(y_test, predict_data)
result

1.0

### this shows that the model is perfect for our prediction

# Persisting Model

#### Here we are creating a model that can be reused anywhere in our code without neccesarily coding it again.

In [14]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib as jb # this library is used for saving a model

df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
X = df.drop(columns=['genre'])
y = df['genre']

my_model = DecisionTreeClassifier()
my_model.fit(X, y)

jb.dump(my_model, 'musicHub.joblib')

['musicHub.joblib']

In [15]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib # method for saving an

# df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
# X = df.drop(columns=['genre'])
# y = df['genre']

# my_model = DecisionTreeClassifier()
# my_model.fit(X, y)

model = joblib.load('musicHub.joblib')
predictions = model.predict([[33, 0]])
predictions



array(['Classical'], dtype=object)

# visualizing Decision Trees

##### To visualize the decision tree we have to download and install the graphviz extension in Visual studio code

In [16]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

df = pd.read_csv(r'C:\Users\USER\Desktop\music.csv')
X = df.drop(columns=['genre'])
y = df['genre']

model = DecisionTreeClassifier()
model.fit(X, y)

tree.export_graphviz(model, out_file='musicHub.dot',
                     feature_names=['age', 'gender'],
                     class_names=sorted(y.unique()),
                     label='all',
                     rounded=True,
                     filled=True)
                     