### Import

In [None]:
import pandas as pd
import librosa
import numpy as np
from collections import Counter
!pip install imblearn
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
!pip install tensorflow
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

### Reading development csv file

In [6]:
FileName = pd.read_csv(f'Project/development.csv')

In [7]:
FileName.path

0       dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0a312...
1       dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/0ee42...
2       dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/1d9f3...
3       dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/269fc...
4       dsl_data/audio/speakers/2BqVo8kVB2Skwgyb/5bbda...
                              ...                        
9849    dsl_data/audio/speakers/vnljypgejkINbBAY/4fb3d...
9850    dsl_data/audio/speakers/vnljypgejkINbBAY/59e6a...
9851    dsl_data/audio/speakers/vnljypgejkINbBAY/5c81c...
9852    dsl_data/audio/speakers/vnljypgejkINbBAY/5ef42...
9853    dsl_data/audio/speakers/vnljypgejkINbBAY/61b7a...
Name: path, Length: 9854, dtype: object

 
 # Data Pre-Processing Section

# We Have used Trimming and Padding to extract more accurate data 


In [None]:
melspectograms = []

for i in range(0, len(FileName.count(axis=1))):
    path_address = FileName.path[i]
    y, sr = librosa.load(path_address)
    yy, _ = librosa.effects.trim(y,top_db = 20)  # trim silence from the beginning and end of the audio file
    p=235000-len(yy)
    padding=np.pad(yy,(0,p))
    mel_spect = librosa.feature.mfcc(padding, sr=sr)
    melspectograms.append(mel_spect)


## Converting List of MFCC to Array 


In [11]:
arrayofmel = np.array(melspectograms)
FinalMFCC = arrayofmel.reshape(arrayofmel.shape[0], -1)

In [12]:
FinalMFCC

array([[-526.3765 , -447.7765 , -367.30096, ...,    0.     ,    0.     ,
           0.     ],
       [-508.97012, -414.96906, -393.45593, ...,    0.     ,    0.     ,
           0.     ],
       [-693.09564, -616.1187 , -582.1136 , ...,    0.     ,    0.     ,
           0.     ],
       ...,
       [-313.54346, -255.6263 , -241.1561 , ...,    0.     ,    0.     ,
           0.     ],
       [-334.59213, -211.85994, -149.16759, ...,    0.     ,    0.     ,
           0.     ],
       [-341.19757, -336.55057, -336.1369 , ...,    0.     ,    0.     ,
           0.     ]], dtype=float32)

## Merging Action and Object as our label and Tranfsorm them to label Encoder function 

In [13]:
# This function is for mixing two columns of labels 
labels = [FileName.action[i] + FileName.object[i] for i in range(len(FileName))]

In [14]:
encoder = LabelEncoder()
labelencoded = encoder.fit_transform(labels)

### Oversampling Technique for Balancing Labels 

In [16]:
# Count the number of elements for each label
labelcounts = Counter(labelencoded)

max_count = max(labelcounts.values())
melspectograms_resampled = []
integer_encoded_labels_resampled = []

for label, count in labelcounts.items():
    label_indices = [i for i, x in enumerate(labelencoded) if x == label]

    random_indices = np.random.choice(label_indices, size=(max_count-count), replace=True)
    
    for index in label_indices + list(random_indices):
        melspectograms_resampled.append(FinalMFCC[index])
        integer_encoded_labels_resampled.append(labelencoded[index])
        
melspectograms_resampled = np.array(melspectograms_resampled)
melspectograms_resampled = melspectograms_resampled.reshape(melspectograms_resampled.shape[0], -1)

In [17]:
### Train Test Split
from sklearn.model_selection import train_test_split
train_data2,test_data2,y_train,y_test=train_test_split(melspectograms_resampled,integer_encoded_labels_resampled,test_size=0.25,random_state=0)

## Using Standard Scalar for Normalization 

In [18]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
train_data2 = sc.fit_transform(train_data2)
test_data2 = sc.transform(test_data2)

In [19]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(train_data2, y_train)

# Predicting the Test set results
y_pred = classifier.predict(test_data2)


from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))


0.6686338797814207


## Now This Section is for Evaluating New DataSet to predict there labels 

In [20]:
# fetchin new data to be predicted for labeling 
evaluation_raw_data = pd.read_csv(f'Project/evaluation.csv')

In [None]:
melspectograms_test = []

for i in range(0, len(evaluation_raw_data.count(axis=1))):
    path_address = evaluation_raw_data.path[i]
    y, sr = librosa.load(path_address)
    yy, _ = librosa.effects.trim(y,top_db = 20)  # trim silence from the beginning and end of the audio file
    p=235000-len(yy)
    padding=np.pad(yy,(0,p))
    mel_spect = librosa.feature.mfcc(padding, sr=sr)
    melspectograms_test.append(mel_spect)


In [23]:
melspectograms_test = np.array(melspectograms_test)
arrayed_melspectograms = melspectograms_test.reshape(melspectograms_test.shape[0], -1)

In [24]:
arrayed_melspectograms_test=sc.transform(arrayed_melspectograms)

In [25]:
predictions = classifier.predict(arrayed_melspectograms_test)
predictions

array([6, 0, 6, ..., 5, 3, 0], dtype=int64)

In [26]:
predictions.shape

(1455,)

In [28]:
predicted_classes = encoder.inverse_transform(predictions)

In [29]:
predicted_classes

array(['increasevolume', 'activatemusic', 'increasevolume', ...,
       'increaseheat', 'decreaseheat', 'activatemusic'], dtype='<U19')

In [30]:
num_samples = len(predicted_classes)
result = pd.DataFrame({'Id': range(num_samples), 'Predicted': predicted_classes})

In [31]:
result.to_csv(r'E:/SVM_Result.csv', index=False, sep=',')