## Load Dataset

In [1]:
import pandas as pd

# Loading the data set
mus = pd.read_csv("Downloads/music.csv")
mus

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


## Inspect Dataset

In [2]:
# Data overview
mus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   age     18 non-null     int64 
 1   gender  18 non-null     int64 
 2   genre   18 non-null     object
dtypes: int64(2), object(1)
memory usage: 564.0+ bytes


In [3]:
# View first 5 rows
mus.head()

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz


In [4]:
# Summary statistics
mus.describe().round(2)

Unnamed: 0,age,gender
count,18.0,18.0
mean,27.94,0.5
std,5.13,0.51
min,20.0,0.0
25%,25.0,0.0
50%,28.0,0.5
75%,31.0,1.0
max,37.0,1.0


In [5]:
# Check for duplicates
mus.duplicated().sum()

0

In [6]:
# Check for missing value
mus.isnull().sum()

age       0
gender    0
genre     0
dtype: int64

## Encode the dependent variable (genre)

In [7]:
from sklearn.preprocessing import LabelEncoder

# Encode the dependent/target variable
label_encoder = LabelEncoder()
mus["genre_encoded"] = label_encoder.fit_transform(mus["genre"])

# Map gender values to catergorical names
mus["gender_spelt"] = mus["gender"].map({0: "Female", 1: "Male"})

# Display the mapping
print("Genre mapping:", dict(zip(label_encoder.classes_, range(len(label_encoder.classes_)))))
print("Gender mapping:", dict(zip(sorted(mus["gender"]), sorted(mus["gender_spelt"]))))

Genre mapping: {'Acoustic': 0, 'Classical': 1, 'Dance': 2, 'HipHop': 3, 'Jazz': 4}
Gender mapping: {0: 'Female', 1: 'Male'}


## Split Features and Lables

In [8]:
# Features (X) and labels (y)
X = mus[["age", "gender",]]
y = mus["genre_encoded"]

from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display sample splits
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

Training data shape: (14, 2)
Testing data shape: (4, 2)


In [9]:
# View test features
X_test

Unnamed: 0,age,gender
0,20,1
1,23,1
8,37,1
5,30,1


## Train the model

In [10]:
from sklearn.tree import DecisionTreeClassifier

# Initialize the model
model = DecisionTreeClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

## Make Predictions

In [11]:
# Predict on the test data
y_pred = model.predict(X_test)

# Display predictions
print(f"Predictions: {label_encoder.inverse_transform(y_pred)}\n")

# Better display
prediction = pd.DataFrame(label_encoder.inverse_transform(y_pred), columns=["Predictions"])
prediction.index = [X_test.index]
prediction

Predictions: ['HipHop' 'HipHop' 'Classical' 'Jazz']



Unnamed: 0,Predictions
0,HipHop
1,HipHop
8,Classical
5,Jazz


## Evaluate the Models

In [12]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         1

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Confusion Matrix:
[[1 0 0]
 [0 2 0]
 [0 0 1]]


## Make predictions for new data

In [13]:
# Prediction for new data
new_mus = pd.DataFrame({
    "age": [20],
    "gender": ["male"],
    
})

# Define a function that takes any gender capitalization
def standardize_gender(dataframe):
    
    # Normalize the gender column to capitalize
    dataframe["gender"] = dataframe["gender"].str.capitalize()
    
    # Mapping for gender
    gender_map = {"Male": 1, "Female": 0, "1": 1, "0": 0}  # Handles both str and num input for gender
    
    # Convert new gender input to numeric
    dataframe["gender"] = dataframe["gender"].map(gender_map)
    
    return dataframe

new_mus = standardize_gender(new_mus)

# Using the trained model to predict
predicted_genre_encoded = model.predict(new_mus)

# Decode the predicted genre
predicted_genre = label_encoder.inverse_transform(predicted_genre_encoded)

print(f"Suggested Genre: {predicted_genre[0]}")

Suggested Genre: HipHop
