In [1]:
import librosa
import numpy as np
import os
import pandas as pd

def extract_features(file_path):
    try:
        # Load the audio file
        y, sr = librosa.load(file_path, duration=30)
        
        # Extract features
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
        harmony = librosa.effects.harmonic(y)
        perceptr = librosa.effects.percussive(y)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)  # Note the underscore for unused variable
        mfccs = librosa.feature.mfcc(y=y, sr=sr)

        features = {
            'chroma_stft': np.mean(chroma_stft),
            'rmse': np.mean(rmse),
            'spectral_centroid': np.mean(spectral_centroid),
            'spectral_bandwidth': np.mean(spectral_bandwidth),
            'rolloff': np.mean(rolloff),
            'zero_crossing_rate': np.mean(zero_crossing_rate),
            'harmony': np.mean(harmony),
            'perceptr': np.mean(perceptr),
            'tempo': tempo
        }

        for i in range(1, 21):
            features[f'mfcc{i}'] = np.mean(mfccs[i-1])

        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [2]:
def process_directory(directory, label, features_list, labels_list):
    for file_name in os.listdir(directory):
        if file_name.endswith('.mp3') or file_name.endswith('.wav'):
            file_path = os.path.join(directory, file_name)
            print(f"Processing {file_path}")
            features = extract_features(file_path)
            if features is not None:
                features_list.append(features)
                labels_list.append(label)


In [3]:
def build_dataset(features_list, labels_list):
    features_df = pd.DataFrame(features_list)
    features_df['label'] = labels_list
    return features_df

In [4]:
dataset_path = r"C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset"
hit_dir = os.path.join(dataset_path, 'hit songs')
flop_dir = os.path.join(dataset_path, 'flop songs')
features_list = []
labels_list = []

# Process directories
process_directory(hit_dir, 1, features_list, labels_list)
process_directory(flop_dir, 0, features_list, labels_list)

# Build dataset and save to CSV
features_df = build_dataset(features_list, labels_list)
features_df.to_csv('audio_features.csv', index=False)

Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\01 - Niluvadhamu Ninu Epudaina - SenSongsMp3.co.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\04 - Chandrullo Unde Kundelu  - SenSongsMp3.co.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Aakasam Baddalaina-SenSongsMp3.Co.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Aggipulla Lanti-SenSongsMp3.Co.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Anuvanuvuu.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Chali Chaliga-SenSongsMp3.Co.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Chilipiga.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction\Audio_Dataset\hit songs\Chiru Chiru Chiru.mp3
Processing C:\Users\AkshithaKochika\Downloads\Hit Prediction

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def convert_string_lists_to_numeric(df):
    for column in df.columns:
        if df[column].dtype == object:
            try:
                df[column] = df[column].apply(eval).apply(np.mean)
            except Exception as e:
                print(f"Could not convert column {column} to numeric values. Error: {e}")
    return df

def preprocess_data(features_df):
    # Convert string lists to numeric values
    features_df = convert_string_lists_to_numeric(features_df)
    
    # Handle missing values if any
    features_df.fillna(features_df.mean(), inplace=True)
    
    # Scale the features
    scaler = StandardScaler()
    X = features_df.drop(columns=['label'])
    X_scaled = scaler.fit_transform(X)
    y = features_df['label']
    
    return X_scaled, y

# Load and preprocess data
features_df = pd.read_csv('audio_features.csv')
X, y = preprocess_data(features_df)


In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define the model
rf = RandomForestClassifier(random_state=42)

# Define the hyperparameters grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Set up the grid search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search
grid_search.fit(X, y)

# Print best parameters and best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)


Fitting 5 folds for each of 81 candidates, totalling 405 fits
Best parameters found:  {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 100}
Best cross-validation score:  0.8036363636363637


In [8]:
# Train the model with the best parameters
best_rf = grid_search.best_estimator_
best_rf.fit(X, y)

# Save the trained model
import joblib
joblib.dump(best_rf, 'finetuned_audio_hit_flop_model.pkl')


['finetuned_audio_hit_flop_model.pkl']

In [9]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

# Cross-validation scores
cv_scores = cross_val_score(best_rf, X, y, cv=5)
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", np.mean(cv_scores))

# Predict and evaluate on the training data
y_pred = best_rf.predict(X)
print("Classification Report:\n", classification_report(y, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y, y_pred))


Cross-validation scores: [0.81818182 0.8        0.8        0.7        0.9       ]
Mean cross-validation score: 0.8036363636363637
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.93      0.97        15
           1       0.97      1.00      0.99        36

    accuracy                           0.98        51
   macro avg       0.99      0.97      0.98        51
weighted avg       0.98      0.98      0.98        51

Confusion Matrix:
 [[14  1]
 [ 0 36]]


In [11]:
import joblib
import pandas as pd

def predict_song(file_path, model_path='finetuned_audio_hit_flop_model.pkl'):
    model = joblib.load(model_path)
    features = extract_features(file_path)
    if features is not None:
        features_df = pd.DataFrame([features])
        prediction = model.predict(features_df)
        return 'Hit' if prediction[0] == 1 else 'Flop'
    else:
        return "Error processing the song."

# Example usage
new_song_path = r"C:\Users\AkshithaKochika\Downloads\Pilichina-SenSongsMp3.Co.mp3"
prediction = predict_song(new_song_path)
print(f'The song is predicted to be a {prediction}')


The song is predicted to be a Hit


