In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier  # The model that we're going to use
from sklearn.model_selection import train_test_split  # helps us split data on train data and test data
from sklearn.metrics import accuracy_score  # helps us to measure the accuracy of our model
import joblib  # helps us to save trained models
from sklearn import tree  # helps us to visualize decision tree

In [4]:
# Loading dataset
music_data = pd.read_csv("music.csv")
X = music_data.drop(labels="genre", axis=1)
y = music_data["genre"]

In [5]:
# Spliting data on train data for model and test data to test the accuracy of our model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
# Selecting model for traning
model = DecisionTreeClassifier()

In [7]:
# Train model
model.fit(X_train, y_train)

DecisionTreeClassifier()

In [8]:
# Save trained model
model = joblib.dump(model, "music-recommender.joblib")

In [9]:
# Load saved model
model = joblib.load("music-recommender.joblib")

In [10]:
# Making predictions
predictions = model.predict([[21, 0]])  # What genre of music do women at the age of 21 like ?
predictions



array(['Dance'], dtype=object)

In [11]:
# Calculating the accuracy
predictions = model.predict(X_test)
score = accuracy_score(y_test, predictions)
score

1.0

In [12]:
# Visualisation of tree graph
tree.export_graphviz(model, out_file="music-recommender.dot",
                    feature_names=["age", "gender"],
                    class_names=sorted(y.unique()),
                    label="all",
                    rounded=True,
                    filled=True)