In [1]:
# Import libraries
import os, fnmatch
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display

In [2]:
# Importing datasets from files to array
labels = []
files = []

# Load guitar data and append a new guitar label & file path 
# for each '.wav' file found
guitarPath='Datasets/guitar/'
for file in os.listdir(guitarPath):
    if fnmatch.fnmatch(file, '*.wav'):
        labels.append('guitar')
        files.append(guitarPath+file)
        #print(file)

# Load violin data and append a new violin label & file path 
# for each '.wav' file found 
violinPath='Datasets/violin/'
for file in os.listdir(violinPath):
    if fnmatch.fnmatch(file, '*.wav'):
        labels.append('violin')
        files.append(violinPath+file)
        #print(file)
        
#files
#labels

In [4]:
# Encode target values found in 'labels' array
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(labels)
Y = le.transform(labels)

In [5]:
# Load files from 'files' array & extract MFCC data
X = []
for file in (files):
    data, sr = librosa.load(file)
    #data/=data.max() # Normalize data 
    if len(data) is None:
        print("Error loading")
        break
    mfccs = librosa.feature.mfcc(data, n_mfcc=13, sr=sr)
    features = np.mean(mfccs,1)
    X.append(features)











In [6]:
#plt.figure(figsize=(25,10))
#librosa.display.specshow(X[1],
#                         x_axis="time",
#                         y_axis="mel",
#                         sr=13)
#plt.colorbar(format="%+2.f dB")
#plt.show()

In [7]:
# Split the data for training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25,random_state=0)  # split the X and y data

In [8]:
# Scaling the data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler() 
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.fit_transform(X_test)

In [9]:
# Apply the Logistical regression learning model and train it
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=3000)  # set the training model with increased max_iter
model.fit(X_train_scaled,y_train)  # Train the model using our training data

# Get predictions
y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

# Evaluate the model
scaledTrainScore="Scaled train score = {:0.3f}".format(model.score(X_train_scaled, y_train))
scaledTestScore="Scaled test score = {:0.3f}".format(model.score(X_test_scaled, y_test))

In [10]:
# Model accuracy after scaling
print(scaledTrainScore)
print(scaledTestScore)

Scaled train score = 0.993
Scaled test score = 1.000


In [11]:
# Evaluate the model 
from sklearn.metrics import classification_report
target_names = ['guitar','violin']
print(classification_report(y_test,y_test_pred, target_names=target_names))

              precision    recall  f1-score   support

      guitar       1.00      1.00      1.00        24
      violin       1.00      1.00      1.00        25

    accuracy                           1.00        49
   macro avg       1.00      1.00      1.00        49
weighted avg       1.00      1.00      1.00        49

