<a href="https://colab.research.google.com/github/Soumya-Choudhury/speech_project/blob/main/speech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
import os

path = '/content/drive/MyDrive/speech_project_techno_india/collegeproject'

os.chdir(path)

# Check the current working directory
print("Current Working Directory: ", os.getcwd())


Current Working Directory:  /content/drive/MyDrive/speech_project_techno_india/collegeproject


In [10]:
os.listdir("/content/drive/My Drive")

['Encapsulation Assignment.gdoc',
 'Constructor Assignment.gdoc',
 'Untitled document (6).gdoc',
 'Static Keyword Assignment.gdoc',
 'OOPS Assignment.gdoc',
 'OOPS Fundamentals.gdoc',
 'Arrays in Java Assignment.gdoc',
 'PROJECT ASSIGNMENT.gdoc',
 'Resume (1).gdoc',
 'Resume.gdoc',
 'Soumya Choudhury resume (9).pdf',
 'Soumya Choudhury resume (8).pdf',
 'Soumya Choudhury resume (7).pdf',
 'Soumya Choudhury resume (6).pdf',
 'Assignment1.gdoc',
 'Recursion Assignment.gdoc',
 'Untitled document (5).gdoc',
 'Soumya Choudhury resume (1) (1).pdf',
 'Untitled document (4).gdoc',
 'Assignment-Linked List.gdoc',
 'Untitled document (3).gdoc',
 'Untitled document (2).gdoc',
 'Untitled document (1).gdoc',
 'Soumya Choudhury resume (5).pdf',
 'project_firstPage (1).gdoc',
 'project_firstPage.gdoc',
 '13000220028_Soumya Choudhury (2).pdf',
 'OECIT701A_CA1_ITA_13000220028_Soumya Choudhury.pdf',
 'Untitled spreadsheet.gsheet',
 'moocs_topsheet.xlsx',
 'movies',
 'Soumya Choudhury resume (4).pdf',
 '

In [11]:
import librosa
import numpy as np

def extract_mfcc(y, sr, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs

def extract_lfcc(y, sr, n_lfcc=13):
    # Compute the DFT (Spectrum)
    D = librosa.stft(y)
    # Convert amplitude spectrum to dB-scale
    magnitude, phase = librosa.magphase(D)
    S = librosa.amplitude_to_db(magnitude, ref=np.max)
    # Compute the LFCC
    lfccs = librosa.feature.mfcc(S=S, sr=sr, n_mfcc=n_lfcc)
    return lfccs

In [16]:

personalities = os.listdir()
# ==========================================
# SECTION 1: FUNCTION DEFINITIONS
# ==========================================

audio_files = []
labels  =[]

for element in personalities:
    if element not in ([".git", ".DS_Store", "README.md"]):
        substack = os.listdir(f"{os.getcwd()}/{element}")
        for file in substack:
            # persona = element.replace(" ","_")
            # os.rename(f"{os.getcwd()}/collegeproject/{element}/{file}",f"{os.getcwd()}/collegeproject/{element}/{persona}-{file}")
            audio_files.append(f"{os.getcwd()}/{element}/{file}")
            labels.append(file.split("-")[0])

import librosa
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

def add_noise_at_db(y, desired_snr_db):
    # Create a random noise
    noise = np.random.randn(len(y))

    # Calculate the power of the signal
    signal_power = np.sum(y**2)

    # Calculate the desired power of the noise
    desired_noise_power = signal_power / (10**(desired_snr_db / 10))

    # Scale the noise to achieve the desired power
    scaling_factor = np.sqrt(desired_noise_power / np.sum(noise**2))
    noise = scaling_factor * noise

    # Add noise to the original signal
    y_noisy = y + noise

    return y_noisy

# ==========================================
# SECTION 2: MAIN CODE - WITHOUT NOISE
# ==========================================

features = []
for file in audio_files:
    try:
        y, sr = librosa.load(file)
        mfcc_features = extract_mfcc(y, sr).mean(axis=1)
        lfcc_features = extract_lfcc(y, sr).mean(axis=1)
        combined_features = np.hstack([mfcc_features, lfcc_features])
        features.append(combined_features)
    except Exception as e:
        del_index = audio_files.index(file)
        print(f"Error at index {del_index}: {e}")
        del labels[del_index]

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Performance WITHOUT Noise:")
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ==========================================
# SECTION 3: MAIN CODE - WITH NOISE 5 db
# ==========================================

features = []
for file in audio_files:
    try:
        y, sr = librosa.load(file)
        y_noisy = add_noise_at_db(y, -5)  # To add 5dB noise
        # For 10dB, use: y_noisy = add_noise_at_db(y, -10)
        mfcc_features = extract_mfcc(y_noisy, sr).mean(axis=1)
        lfcc_features = extract_lfcc(y_noisy, sr).mean(axis=1)
        combined_features = np.hstack([mfcc_features, lfcc_features])
        features.append(combined_features)
    except Exception as e:
        del_index = audio_files.index(file)
        print(f"Error at index {del_index}: {e}")
        del labels[del_index]

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("\nPerformance WITH Noise:")
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Performance WITHOUT Noise:
0.9444444444444444
                  precision    recall  f1-score   support

      Aamir_Khan       1.00      0.88      0.93         8
      Ajay_Devgn       0.89      1.00      0.94         8
Pareneeti_Chopra       0.89      1.00      0.94         8
    Preeti_Zinta       1.00      0.83      0.91         6
   Shahid_Kapoor       1.00      1.00      1.00         6

        accuracy                           0.94        36
       macro avg       0.96      0.94      0.94        36
    weighted avg       0.95      0.94      0.94        36


Performance WITH Noise:
0.8888888888888888
                  precision    recall  f1-score   support

      Aamir_Khan       0.88      0.88      0.88         8
      Ajay_Devgn       0.78      0.88      0.82         8
Pareneeti_Chopra       0.89      1.00      0.94         8
    Preeti_Zinta       1.00      0.83      0.91         6
   Shahid_Kapoor       1.00      0.83      0.91         6

        accuracy                   

In [15]:
# ==========================================
# SECTION 3: MAIN CODE - WITH NOISE 10 db
# ==========================================

features = []
for file in audio_files:
    try:
        y, sr = librosa.load(file)
        y_noisy = add_noise_at_db(y, -10)  # To add 5dB noise
        # For 10dB, use: y_noisy = add_noise_at_db(y, -10)
        mfcc_features = extract_mfcc(y_noisy, sr).mean(axis=1)
        lfcc_features = extract_lfcc(y_noisy, sr).mean(axis=1)
        combined_features = np.hstack([mfcc_features, lfcc_features])
        features.append(combined_features)
    except Exception as e:
        del_index = audio_files.index(file)
        print(f"Error at index {del_index}: {e}")
        del labels[del_index]

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("\nPerformance WITH Noise:")
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))



Performance WITH Noise:
0.8055555555555556
                  precision    recall  f1-score   support

      Aamir_Khan       0.70      0.88      0.78         8
      Ajay_Devgn       0.71      0.62      0.67         8
Pareneeti_Chopra       0.89      1.00      0.94         8
    Preeti_Zinta       1.00      0.83      0.91         6
   Shahid_Kapoor       0.80      0.67      0.73         6

        accuracy                           0.81        36
       macro avg       0.82      0.80      0.80        36
    weighted avg       0.81      0.81      0.80        36



In [17]:
# ==========================================
# SECTION 3: MAIN CODE - WITH NOISE 20 db
# ==========================================

features = []
for file in audio_files:
    try:
        y, sr = librosa.load(file)
        y_noisy = add_noise_at_db(y, -20)  # To add 5dB noise
        # For 10dB, use: y_noisy = add_noise_at_db(y, -10)
        mfcc_features = extract_mfcc(y_noisy, sr).mean(axis=1)
        lfcc_features = extract_lfcc(y_noisy, sr).mean(axis=1)
        combined_features = np.hstack([mfcc_features, lfcc_features])
        features.append(combined_features)
    except Exception as e:
        del_index = audio_files.index(file)
        print(f"Error at index {del_index}: {e}")
        del labels[del_index]

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("\nPerformance WITH Noise:")
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Performance WITH Noise:
0.5833333333333334
                  precision    recall  f1-score   support

      Aamir_Khan       0.50      0.75      0.60         8
      Ajay_Devgn       0.38      0.38      0.38         8
Pareneeti_Chopra       0.83      0.62      0.71         8
    Preeti_Zinta       0.83      0.83      0.83         6
   Shahid_Kapoor       0.50      0.33      0.40         6

        accuracy                           0.58        36
       macro avg       0.61      0.58      0.58        36
    weighted avg       0.60      0.58      0.58        36

