In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv('/Users/Shanti/Desktop/Job Search/Personal Projects/features.csv')


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:
# Separate features and labels
X = df.drop('class', axis=1)
y = df['class']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Save feature names
feature_names = X_train.columns

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [5]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

# Create the parameter grid
param_grid = {
    'learning_rate': [0.01, 0.1],
    'max_depth': [4, 6],
    'gamma': [0, 0.1,],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'n_estimators': [100, 200]
}

# Create the model
xgb_model = XGBClassifier(objective='multi:softmax', num_class=len(label_encoder.classes_))

# Perform grid search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='accuracy', cv=3, verbose=1, n_jobs=-1)

# Fit grid search on resampled training data
grid_search.fit(X_train_resampled, y_train_resampled)



Fitting 3 folds for each of 64 candidates, totalling 192 fits




In [6]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

# Get the best model
best_model = grid_search.best_estimator_

# Make predictions with the best model
y_pred_best = best_model.predict(X_test)

# Evaluate the best model
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f'Best Model Accuracy: {accuracy_best}')
print('Best Model Classification Report:')
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))

Best Model Accuracy: 0.7516778523489933
Best Model Classification Report:
              precision    recall  f1-score   support

         cel       0.69      0.74      0.72        73
         cla       0.73      0.69      0.71       105
         flu       0.69      0.66      0.67        87
         gac       0.74      0.90      0.81       108
         gel       0.67      0.81      0.73       140
         org       0.75      0.80      0.77       139
         pia       0.82      0.74      0.78       162
         sax       0.70      0.59      0.64       127
         tru       0.83      0.79      0.81       116
         vio       0.75      0.63      0.68       126
         voi       0.85      0.87      0.86       158

    accuracy                           0.75      1341
   macro avg       0.75      0.75      0.74      1341
weighted avg       0.75      0.75      0.75      1341



In [7]:
# saving the model
# After training your model, save it using joblib.dump.
import joblib

# Save the model
joblib.dump(best_model, 'best_xgboost_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(feature_names, 'feature_names.pkl')


['feature_names.pkl']

In [None]:
# Load the Model
# When you need to use the model again, load it using joblib.load.
import joblib

# Load the model
best_model = joblib.load('best_xgboost_model.pkl')
scaler = joblib.load('scaler.pkl')
label_encoder = joblib.load('label_encoder.pkl')


Pipeline

In [11]:
import os
import glob
from pydub import AudioSegment

def convert_all_mp3_to_wav(root_directory):
    for subdir, _, files in os.walk(root_directory):
        for file in files:
            if file.endswith('.mp3'):
                mp3_path = os.path.join(subdir, file)
                wav_path = os.path.join(subdir, file.replace('.mp3', '.wav'))
                audio = AudioSegment.from_mp3(mp3_path)
                audio.export(wav_path, format='wav')
                print(f"Converted {mp3_path} to {wav_path}")


In [12]:
# Define the root directory where your MP3 files are stored
root_directory = '/Users/Shanti/Desktop/Job Search/Personal Projects/IRMAS-TrainingData/jazz'

# Run the conversion
convert_all_mp3_to_wav(root_directory)


Converted /Users/Shanti/Desktop/Job Search/Personal Projects/IRMAS-TrainingData/jazz/03TakeFive.mp3 to /Users/Shanti/Desktop/Job Search/Personal Projects/IRMAS-TrainingData/jazz/03TakeFive.wav


In [8]:
import sys, os
import essentia.standard
from essentia.streaming import *
import numpy as np
import pandas as pd
import glob


In [9]:
FILE_EXT = "*.wav"

class FeatureExtractor(essentia.streaming.CompositeBase):

    def __init__(self, frameSize=2048, hopSize=1024, sampleRate=44100.):
        super(FeatureExtractor, self).__init__()

        halfSampleRate = sampleRate / 2
        minFrequency = sampleRate / frameSize

        fc = FrameCutter(frameSize=frameSize, hopSize=hopSize)
        zcr = ZeroCrossingRate()
        fc.frame >> zcr.signal
        w = Windowing(type='blackmanharris62')
        fc.frame >> w.frame
        spec = Spectrum()
        w.frame >> spec.frame
        energy = Energy()
        spec.spectrum >> energy.array
        rms = RMS()
        spec.spectrum >> rms.array
        square1 = UnaryOperator(type='square')
        centroid = Centroid(range=halfSampleRate)
        spec.spectrum >> square1.array >> centroid.array
        cm = CentralMoments(range=halfSampleRate)
        ds = DistributionShape()
        spec.spectrum >> cm.array
        cm.centralMoments >> ds.centralMoments
        mfcc = MFCC(numberBands=40, numberCoefficients=13, sampleRate=sampleRate)
        spec.spectrum >> mfcc.spectrum
        mfcc.bands >> None
        lpc = LPC(order=10, sampleRate=sampleRate)
        spec.spectrum >> lpc.frame
        lpc.reflection >> None
        square2 = UnaryOperator(type='square')
        decrease = Decrease(range=halfSampleRate)
        spec.spectrum >> square2.array >> decrease.array
        ebr_low = EnergyBand(startCutoffFrequency=20, stopCutoffFrequency=150, sampleRate=sampleRate)
        ebr_mid_low = EnergyBand(startCutoffFrequency=150, stopCutoffFrequency=800, sampleRate=sampleRate)
        ebr_mid_hi = EnergyBand(startCutoffFrequency=800, stopCutoffFrequency=4000, sampleRate=sampleRate)
        ebr_hi = EnergyBand(startCutoffFrequency=4000, stopCutoffFrequency=20000, sampleRate=sampleRate)
        spec.spectrum >> ebr_low.spectrum
        spec.spectrum >> ebr_mid_low.spectrum
        spec.spectrum >> ebr_mid_hi.spectrum
        spec.spectrum >> ebr_hi.spectrum
        hfc = HFC(sampleRate=sampleRate)
        spec.spectrum >> hfc.spectrum
        flux = Flux()
        spec.spectrum >> flux.spectrum
        ro = RollOff(sampleRate=sampleRate)
        spec.spectrum >> ro.spectrum
        sp = StrongPeak()
        spec.spectrum >> sp.spectrum
        barkBands = BarkBands(numberBands=27, sampleRate=sampleRate)
        spec.spectrum >> barkBands.spectrum
        crest = Crest()
        barkBands.bands >> crest.array
        flatness = FlatnessDB()
        barkBands.bands >> flatness.array
        cmbb = CentralMoments(range=26)
        dsbb = DistributionShape()
        barkBands.bands >> cmbb.array
        cmbb.centralMoments >> dsbb.centralMoments
        scx = SpectralComplexity(magnitudeThreshold=0.005, sampleRate=sampleRate)
        spec.spectrum >> scx.spectrum
        pitch = PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate)
        spec.spectrum >> pitch.spectrum
        pitch.pitch >> None
        ps = PitchSalience(sampleRate=sampleRate)
        spec.spectrum >> ps.spectrum
        sc = SpectralContrast(frameSize=frameSize, sampleRate=sampleRate, numberBands=6, lowFrequencyBound=20, highFrequencyBound=11000, neighbourRatio=0.4, staticDistribution=0.15)
        spec.spectrum >> sc.spectrum
        peaks = SpectralPeaks(orderBy='frequency', minFrequency=minFrequency, sampleRate=sampleRate)
        spec.spectrum >> peaks.spectrum
        diss = Dissonance()
        peaks.frequencies >> diss.frequencies
        peaks.magnitudes >> diss.magnitudes
        harmPeaks = HarmonicPeaks()
        peaks.frequencies >> harmPeaks.frequencies
        peaks.magnitudes >> harmPeaks.magnitudes
        pitch.pitch >> harmPeaks.pitch
        tristimulus = Tristimulus()
        harmPeaks.harmonicFrequencies >> tristimulus.frequencies
        harmPeaks.harmonicMagnitudes >> tristimulus.magnitudes
        odd2even = OddToEvenHarmonicEnergyRatio()
        harmPeaks.harmonicFrequencies >> odd2even.frequencies
        harmPeaks.harmonicMagnitudes >> odd2even.magnitudes
        inharmonicity = Inharmonicity()
        harmPeaks.harmonicFrequencies >> inharmonicity.frequencies
        harmPeaks.harmonicMagnitudes >> inharmonicity.magnitudes

        self.inputs['signal'] = fc.signal
        self.outputs['zcr'] = zcr.zeroCrossingRate
        self.outputs['spectral_energy'] = energy.energy
        self.outputs['spectral_rms'] = rms.rms
        self.outputs['mfcc'] = mfcc.mfcc
        self.outputs['lpc'] = lpc.lpc
        self.outputs['spectral_centroid'] = centroid.centroid
        self.outputs['spectral_kurtosis'] = ds.kurtosis
        self.outputs['spectral_spread'] = ds.spread
        self.outputs['spectral_skewness'] = ds.skewness
        self.outputs['spectral_dissonance'] = diss.dissonance
        self.outputs['sccoeffs'] = sc.spectralContrast
        self.outputs['scvalleys'] = sc.spectralValley
        self.outputs['spectral_decrease'] = decrease.decrease
        self.outputs['spectral_energyband_low'] = ebr_low.energyBand
        self.outputs['spectral_energyband_middle_low'] = ebr_mid_low.energyBand
        self.outputs['spectral_energyband_middle_high'] = ebr_mid_hi.energyBand
        self.outputs['spectral_energyband_high'] = ebr_hi.energyBand
        self.outputs['hfc'] = hfc.hfc
        self.outputs['spectral_flux'] = flux.flux
        self.outputs['spectral_rolloff'] = ro.rollOff
        self.outputs['spectral_strongpeak'] = sp.strongPeak
        self.outputs['barkbands'] = barkBands.bands
        self.outputs['spectral_crest'] = crest.crest
        self.outputs['spectral_flatness_db'] = flatness.flatnessDB
        self.outputs['barkbands_kurtosis'] = dsbb.kurtosis
        self.outputs['barkbands_spread'] = dsbb.spread
        self.outputs['barkbands_skewness'] = dsbb.skewness
        self.outputs['spectral_complexity'] = scx.spectralComplexity
        self.outputs['pitch_instantaneous_confidence'] = pitch.pitchConfidence
        self.outputs['pitch_salience'] = ps.pitchSalience
        self.outputs['inharmonicity'] = inharmonicity.inharmonicity
        self.outputs['oddtoevenharmonicenergyratio'] = odd2even.oddToEvenHarmonicEnergyRatio
        self.outputs['tristimulus'] = tristimulus.tristimulus


In [10]:
import essentia.standard
from essentia.streaming import *
import numpy as np


# Function to preprocess a new song
def preprocess_song(file_path):
    loader = essentia.streaming.EqloudLoader(filename=file_path)
    fEx = FeatureExtractor(frameSize=2048, hopSize=1024, sampleRate=loader.paramValue('sampleRate'))
    p = essentia.Pool()

    loader.audio >> fEx.signal

    for desc, output in fEx.outputs.items():
        output >> (p, desc)

    essentia.run(loader)

    stats = ['mean', 'var', 'dmean', 'dvar']
    statsPool = essentia.standard.PoolAggregator(defaultStats=stats)(p)
    
    pool_dict = dict()
    for desc in statsPool.descriptorNames():
        if type(statsPool[desc]) is float:
            pool_dict[desc] = statsPool[desc]
        elif type(statsPool[desc]) is np.ndarray:
            for i, value in enumerate(statsPool[desc]):
                feature_name = "{desc_name}{desc_number}.{desc_stat}".format(
                    desc_name=desc.split('.')[0],
                    desc_number=i,
                    desc_stat=desc.split('.')[1])
                pool_dict[feature_name] = value
    
    features = pd.DataFrame(pool_dict, index=[os.path.basename(file_path)])
    features = scaler.transform(features)
    return features



In [16]:
import joblib

# Load the model, scaler, and label encoder
best_model = joblib.load('best_xgboost_model.pkl')
scaler = joblib.load('scaler.pkl')
label_encoder = joblib.load('label_encoder.pkl')
feature_names = joblib.load('feature_names.pkl')

# Function to predict instruments in a song
def predict_instruments(model, file_path, scaler, label_encoder,feature_names):
    # Preprocess the song to extract features
    features = preprocess_song(file_path)

    # Ensure features are a DataFrame
    if not isinstance(features, pd.DataFrame):
        features = pd.DataFrame(features, columns=feature_names)
        
    # Ensure features have the same columns as those used during training
    features = features.reindex(columns=feature_names, fill_value=0)
    
    # Standardize the features
    features = scaler.transform(features)
    
    # Predict the probabilities of each class
    probabilities = model.predict_proba(features)[0]

    # Get the class labels
    classes = label_encoder.classes_
    
    # Create a sorted list of (class, probability) tuples
    sorted_probabilities = sorted(zip(classes, probabilities), key=lambda x: x[1], reverse=True)
    
    return sorted_probabilities


In [17]:
# Example usage
file_path = '/Users/Shanti/Desktop/Job Search/Personal Projects/IRMAS-TrainingData/jazz/03TakeFive.wav'  # Update with the actual path to your song file
predicted_instruments = predict_instruments(best_model, file_path, scaler, label_encoder,feature_names)

print(f'Predicted instruments (from highest to lowest probability):')
for instrument, probability in predicted_instruments:
    print(f'{instrument}: {probability:.4f}')

# This approach will allow you to see all the instruments present in the song, 
# ranked by their probability, providing a more detailed and informative prediction.

Predicted instruments (from highest to lowest probability):
pia: 0.1986
org: 0.1773
sax: 0.1458
cel: 0.0927
vio: 0.0836
tru: 0.0760
gel: 0.0731
gac: 0.0468
cla: 0.0461
voi: 0.0317
flu: 0.0283




In [None]:
#T his one gets only the piano
# import joblib

# # Load the model, scaler, and label encoder
# best_model = joblib.load('best_xgboost_model.pkl')
# scaler = joblib.load('scaler.pkl')
# label_encoder = joblib.load('label_encoder.pkl')

# # Function to predict instruments in a song
# def predict_instruments(model, file_path, scaler, label_encoder):
#     # Preprocess the song to extract features
#     features = preprocess_song(file_path)
    
#     # Standardize the features
#     features = scaler.transform(features)
    
#     # Predict the class label
#     prediction = model.predict(features)
    
#     # Decode the class label
#     instrument = label_encoder.inverse_transform(prediction)
#     return instrument

# # Example usage
# file_path = '/Users/Shanti/Desktop/Job Search/Personal Projects/IRMAS-TrainingData/jazz/03TakeFive.wav'  # Update with the actual path to your song file
# predicted_instruments = predict_instruments(best_model, file_path, scaler, label_encoder)

# print(f'Predicted instruments: {predicted_instruments}')

!pip install streamlit

In [18]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.35.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.8.2-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting protobuf<5,>=3.20 (from streamlit)
  Using cached protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl.metadata (3.0 kB)
Collecting rich<14,>=10.14.0 (from streamlit)
  Using cached rich-13.7.1-py3-none-any.whl.metadata (18 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Downloading tenacity-8.3.0-py3-none-any.whl.metadata (1.2 kB)
