# Naive Bayes Classifier for Song Explicitness Detection

## Importing Libraries

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler


## Loading the Data

In [None]:

# Load the data
file_path = '/mnt/data/file-LjTfiBVd3Yw7OInh8bU8sMdL'
songs_df = pd.read_csv(file_path)


## Preprocessing and Training the Model

In [None]:

# Set the random seed for reproducibility
random_seed = 42

# Select the features and target
features = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness',
            'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
target = 'explicit'

X = songs_df[features]
y = songs_df[target].astype(int)  # Ensure the target is an integer

# Split the dataset into a 50:50 training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=random_seed)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Gaussian Naive Bayes classifier
gnb = GaussianNB()

# Train the model
gnb.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test_scaled)

# Calculate accuracy and display the classificiation report
accuracy = accuracy_score(y_test, y_pred)
classification_report_output = classification_report(y_test, y_pred)


## Results

In [None]:
print(f'Accuracy: {accuracy}')
print(classification_report_output)