!pip install librosa --upgrade scipy 

In [24]:
import os
import pandas as pd
import librosa
import numpy as np
import scipy.stats

# Function to extract features from a single audio file
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)
        
        # Extract Chroma Features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma, axis=1)
        
        # Extract Spectral Features
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_centroid_mean = np.mean(spectral_centroid)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth)
        spectral_contrast_mean = np.mean(spectral_contrast)
        
        # Extract Tempo Features
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        
        # Extract Statistical Features
        mean = np.mean(y)
        std_dev = np.std(y)
        skewness = scipy.stats.skew(y)
        kurtosis = scipy.stats.kurtosis(y)
        
        # Create feature dictionary
        features = {
            **{f'mfcc_{i}': mfcc for i, mfcc in enumerate(mfccs_mean)},
            **{f'chroma_{i}': chroma for i, chroma in enumerate(chroma_mean)},
            'spectral_centroid': spectral_centroid_mean,
            'spectral_bandwidth': spectral_bandwidth_mean,
            'spectral_contrast': spectral_contrast_mean,
            'tempo': tempo[0],
            'mean': mean,
            'std_dev': std_dev,
            'skewness': skewness,
            'kurtosis': kurtosis
        }
        
        return features
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

# Function to traverse directories and extract features
def process_directory(root_dir):
    data = []
    
    for label in os.listdir(root_dir):
        label_dir = os.path.join(root_dir, label)
        if os.path.isdir(label_dir):
            for file_name in os.listdir(label_dir):
                file_path = os.path.join(label_dir, file_name)
                if file_path.endswith('.wav'):  # Adjust if you use different formats
                    features = extract_features(file_path)
                    if features is not None:
                        features['file_name'] = file_name
                        features['label'] = label
                        data.append(features)
    
    return data

# Directory containing the subdirectories of audio files
root_dir = "genres_original"

# Process the directory and get the feature data
data = process_directory(root_dir)

# Convert the list of features into a DataFrame
df = pd.DataFrame(data)


desired_columns = ['file_name'] + [col for col in df.columns if col != 'file_name']
df = df[desired_columns]

# Expand MFCC and Chroma feature lists into separate columns if needed
mfcc_columns = [f'mfcc_{i}' for i in range(13)]
chroma_columns = [f'chroma_{i}' for i in range(12)]

# Ensure all expected feature columns are present
for column in mfcc_columns:
    if column not in df.columns:
        df[column] = None

for column in chroma_columns:
    if column not in df.columns:
        df[column] = None

# Save the DataFrame to a CSV file
df.to_csv('audio_features.csv', index=False)

print("CSV file has been created successfully.")

  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing file genres_original\jazz\jazz.00054.wav: 
CSV file has been created successfully.


In [29]:
#make a dataframe for the csv
music_data = pd.read_csv ('audio_features.csv')

In [30]:
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder() 
music_data['label'] = label_encoder.fit_transform(music_data['label'])

In [31]:
music_data

Unnamed: 0,file_name,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,chroma_11,spectral_centroid,spectral_bandwidth,spectral_contrast,tempo,mean,std_dev,skewness,kurtosis,label
0,blues.00000.wav,-113.598820,121.570670,-19.162262,42.363937,-6.362266,18.621931,-13.699734,15.339802,-12.274304,...,0.385101,1784.122641,2002.412407,20.523289,123.046875,-0.000060,0.140688,-0.203222,1.580552,0
1,blues.00001.wav,-207.523830,123.985140,8.947019,35.867150,2.909595,21.519472,-8.556513,23.370676,-10.103608,...,0.334094,1530.261767,2038.987608,20.672486,67.999589,-0.000038,0.107619,0.064394,2.337151,0
2,blues.00002.wav,-90.757164,140.440870,-29.084547,31.686693,-13.976547,25.753752,-13.664990,11.634442,-11.778322,...,0.588508,1552.832481,1747.754087,22.191292,161.499023,-0.000022,0.183227,-0.153939,0.734157,0
3,blues.00003.wav,-199.575130,150.086100,5.663404,26.855278,1.770072,14.232647,-4.827845,9.286853,-0.756120,...,0.383199,1070.153418,1596.422564,21.423621,63.024009,-0.000014,0.162029,0.012998,1.330553,0
4,blues.00004.wav,-160.354170,126.209480,-35.581394,22.139256,-32.473550,10.850702,-23.350070,0.493249,-11.796538,...,0.272931,1835.128513,1748.410758,21.459618,135.999178,-0.000028,0.103356,0.135624,3.995475,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,rock.00095.wav,-153.630040,109.904785,-23.091644,59.014496,-7.520482,19.728552,-16.488035,18.078283,-22.212603,...,0.193236,2008.174521,2106.314092,22.977342,103.359375,-0.000018,0.081634,-0.027726,0.593214,9
995,rock.00096.wav,-142.416210,116.205460,-32.160263,49.111510,-8.389906,22.885155,-18.317211,20.110556,-22.102661,...,0.322926,2006.827265,2068.839259,22.665250,117.453835,-0.000008,0.080221,-0.041033,1.204248,9
996,rock.00097.wav,-124.989655,115.182340,-47.985886,52.820343,-13.493603,21.751259,-12.417660,20.127153,-18.373243,...,0.468874,2077.565822,1927.228013,22.309244,129.199219,-0.000011,0.083607,-0.067293,0.753047,9
997,rock.00098.wav,-225.033360,123.657265,-9.745124,56.613846,10.421821,20.643210,-10.258778,15.571303,-8.240351,...,0.341094,1398.649504,1818.165124,22.491635,73.828125,-0.000014,0.090761,0.039108,0.712659,9


In [45]:
music_data_num = music_data.drop('file_name',axis=1)
_corr = music_data_num.corr()

In [47]:
_corr

Unnamed: 0,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,...,chroma_11,spectral_centroid,spectral_bandwidth,spectral_contrast,tempo,mean,std_dev,skewness,kurtosis,label
mfcc_0,1.0,-0.639594,-0.100924,0.108949,-0.114528,0.235808,0.007349,0.331839,0.053402,0.340442,...,0.449099,0.721443,0.648668,-0.462326,-0.003834,-0.02382,0.783407,-0.005526,-0.56572,0.340047
mfcc_1,-0.639594,1.0,-0.269118,0.050485,-0.152811,-0.091921,-0.264384,-0.191486,-0.323335,-0.215829,...,-0.44149,-0.940176,-0.896706,0.548351,0.013822,-0.017298,-0.489161,-0.021316,0.237952,-0.355013
mfcc_2,-0.100924,-0.269118,1.0,-0.409733,0.601435,-0.25593,0.519249,-0.317271,0.438724,-0.347412,...,-0.00088,0.188385,0.494298,-0.308834,-0.093446,0.103877,0.165568,0.037543,0.059877,0.062727
mfcc_3,0.108949,0.050485,-0.409733,1.0,-0.449174,0.673473,-0.432791,0.638219,-0.45096,0.549084,...,0.201514,-0.13772,-0.280155,0.328066,0.040935,-0.152894,-0.020598,-0.013885,-0.17103,-0.013898
mfcc_4,-0.114528,-0.152811,0.601435,-0.449174,1.0,-0.434816,0.76643,-0.421946,0.647517,-0.446433,...,0.037516,0.069578,0.303413,-0.381725,-0.081475,0.11058,0.135135,0.040496,0.085922,0.067833
mfcc_5,0.235808,-0.091921,-0.25593,0.673473,-0.434816,1.0,-0.482227,0.837284,-0.446738,0.724244,...,0.315093,0.016873,-0.066496,0.294348,0.047139,-0.192784,0.097834,-0.059037,-0.184573,0.080652
mfcc_6,0.007349,-0.264384,0.519249,-0.432791,0.76643,-0.482227,1.0,-0.487313,0.787125,-0.423076,...,0.116371,0.216089,0.391392,-0.551785,-0.080651,0.102423,0.193282,0.035566,0.024645,0.182892
mfcc_7,0.331839,-0.191486,-0.317271,0.638219,-0.421946,0.837284,-0.487313,1.0,-0.432239,0.811881,...,0.361265,0.13375,0.027401,0.224428,0.041339,-0.145894,0.147499,-0.020273,-0.225885,0.108788
mfcc_8,0.053402,-0.323335,0.438724,-0.45096,0.647517,-0.446738,0.787125,-0.432239,1.0,-0.378723,...,0.15732,0.289581,0.410083,-0.582174,-0.071243,0.09639,0.238964,0.005535,0.051679,0.197974
mfcc_9,0.340442,-0.215829,-0.347412,0.549084,-0.446433,0.724244,-0.423076,0.811881,-0.378723,1.0,...,0.342028,0.192394,0.063072,0.175632,0.03646,-0.12856,0.161055,-0.036683,-0.156411,0.116203


In [48]:
_corr[["label"]]

Unnamed: 0,label
mfcc_0,0.340047
mfcc_1,-0.355013
mfcc_2,0.062727
mfcc_3,-0.013898
mfcc_4,0.067833
mfcc_5,0.080652
mfcc_6,0.182892
mfcc_7,0.108788
mfcc_8,0.197974
mfcc_9,0.116203


In [33]:
# data set is ready for training
X = music_data.drop(['label','file_name'],axis=1) 
y = music_data['label']

In [34]:
#perform mixmax scaling for 
Scaler = preprocessing.MinMaxScaler()
music_data_scaled = Scaler.fit_transform(X)
music_data_scaled = pd.DataFrame(music_data_scaled,columns=X.columns)

In [35]:
from sklearn.model_selection import train_test_split 
  
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.3,random_state=121) 

In [38]:
# all possible classifiers for this type of problem
"""
from sklearn.metrics import accuracy_score 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.linear_model import LogisticRegression 
import catboost as cb 
from xgboost import XGBClassifier"""

'\nfrom sklearn.metrics import accuracy_score \nfrom sklearn.neighbors import KNeighborsClassifier \nfrom sklearn.tree import DecisionTreeClassifier \nfrom sklearn.ensemble import RandomForestClassifier \nfrom sklearn.linear_model import LogisticRegression \nimport catboost as cb \nfrom xgboost import XGBClassifier'

In [56]:
!pip install xgboost catboost

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting catboost
  Downloading catboost-1.2.5-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Collecting plotly (from catboost)
  Downloading plotly-5.23.0-py3-none-any.whl.metadata (7.3 kB)
Collecting tenacity>=6.2.0 (from plotly->catboost)
  Downloading tenacity-9.0.0-py3-none-any.whl.metadata (1.2 kB)
Downloading catboost-1.2.5-cp312-cp312-win_amd64.whl (101.1 MB)
   ---------------------------------------- 0.0/101.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/101.1 MB 2.3 MB/s eta 0:00:44
   ---------------------------------------- 0.3/101.1 MB 3.9 MB/s eta 0:00:26
   ---------------------------------------- 0.7/101.1 MB 5.4 MB/s eta 0:00:19
   ---------------------------------------- 1.1/101.1 MB 7.2 MB/s eta 0:00:14
    --------------------------------------- 1.5/101.1 MB 7.5 MB/s eta 0:00:


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [57]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import catboost as cb 

In [40]:
rf = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6433333333333333


In [53]:
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05) 
xgb.fit(X_train, y_train) 
y_pred = xgb.predict(X_test) 
accuracy = accuracy_score(y_test, y_pred) 
print("Accuracy:", accuracy)

Accuracy: 0.65


In [58]:
cbc = cb.CatBoostClassifier(verbose=0, eval_metric='Accuracy', loss_function='MultiClass') 
cbc.fit(X_train, y_train) 
y_pred = cbc.predict(X_test) 
accuracy = accuracy_score(y_test, y_pred) 
print("Accuracy:", accuracy)

Accuracy: 0.7233333333333334


In [72]:
#make a cnn model
import tensorflow as tf
from tensorflow.keras import layers, models


In [83]:
import tensorflow as tf
from tensorflow.keras import layers, models

def create_simple_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    
    # First convolutional layer
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Second convolutional layer
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Third convolutional layer
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Flatten and fully connected layers
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Example usage
# Assuming X_train is a NumPy array
input_shape = (X_train.shape[0], X_train.shape[1], 1)

model = create_simple_cnn_model(input_shape, num_classes)


# Print the model summary
model.summary()
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [84]:
X_train_2 = x_train
#X = X.reshape((X.shape[0], X.shape[1], 1))  
training_history = model.fit(X_train_2, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/30


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(None, 33), dtype=float32). Expected shape (None, 699, 33, 1), but input has incompatible shape (None, 33)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 33), dtype=float32)
  • training=True
  • mask=None

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models

# Assuming df is your DataFrame
# Load your DataFrame here, for example:
# df = pd.read_csv('your_data.csv')

# 1. Separate Features and Labels
X = df.iloc[:, :-1].values  # Features (first 33 columns)
y = df.iloc[:, -1].values   # Labels (last column)

# 2. Reshape the Features for CNN
X = X.reshape((X.shape[0], X.shape[1], 1))  # Reshape to (999, 33, 1)

# 3. Encode the Labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Convert labels to integers
y = to_categorical(y)  # Convert to one-hot encoded labels

# 4. Define the CNN Model
model = models.Sequential()
model.add(layers.Conv1D(32, 3, activation='relu', input_shape=(33, 1)))
model.add(layers.MaxPooling1D(2))
model.add(layers.Conv1D(64, 3, activation='relu'))
model.add(layers.MaxPooling1D(2))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(y.shape[1], activation='softmax'))  # y.shape[1] gives the number of classes

# 5. Compile the Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 6. Train the Model
history = model.fit(X, y, epochs=20, validation_split=0.2)

# 7. Evaluate the Model (optional)
test_loss, test_acc = model.evaluate(X, y)
print(f'Test accuracy: {test_acc}')
