In [91]:
import os
import pandas as pd




In [160]:
# Define the root folder that contains subfolders with .wav files
root_folder = "processed_recorded_audio"

# Initialize an empty list to store file paths and categories
data = []

# Iterate through each subfolder
for subfolder in os.listdir(root_folder):
    subfolder_path = os.path.join(root_folder, subfolder)

    # Check if the path is a directory (subfolder)
    if os.path.isdir(subfolder_path):
        # Iterate through all .wav files in the subfolder
        for wav_file in os.listdir(subfolder_path):
            if wav_file.endswith(".wav"):
                # Get the full path of the .wav file
                file_path = os.path.join(subfolder_path, wav_file)
                
                # Append the file path and category (subfolder name) to the data list
                data.append([file_path, subfolder])

# Create a DataFrame from the collected data
finaldf = pd.DataFrame(data, columns=["file_path", "category"])


In [161]:
finaldf.head()

Unnamed: 0,file_path,category
0,processed_recorded_audio\Backward\Backward_01.wav,Backward
1,processed_recorded_audio\Backward\Backward_02.wav,Backward
2,processed_recorded_audio\Backward\Backward_03.wav,Backward
3,processed_recorded_audio\Backward\Backward_04.wav,Backward
4,processed_recorded_audio\Backward\Backward_05.wav,Backward


In [159]:
# data augmentation

SyntaxError: invalid syntax (810640648.py, line 1)

In [172]:
import os
import numpy as np
import pandas as pd
import librosa
import random
import soundfile as sf  # New import for saving audio

# Load an audio file
def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

# Add white noise
def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio))
    return audio + noise_factor * noise

# Change speed
def change_speed(audio, speed_factor=1.5):
    return librosa.effects.time_stretch(audio,rate=speed_factor)

# Change pitch
def change_pitch(audio, sr, n_steps=2):
    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)

# Shift time
def time_shift(audio, shift_max=0.2):
    shift = int(len(audio) * random.uniform(-shift_max, shift_max))
    return np.roll(audio, shift)

# Adjust volume
def adjust_volume(audio, gain_factor=1.5):
    return audio * gain_factor

# Reverse audio
def reverse_audio(audio):
    return np.flip(audio)

# Function to save augmented audio and add new rows to the dataframe
def augment_audio_and_update_df(finaldf, output_folder):
    new_rows = []
    
    for index, row in finaldf.iterrows():
        file_path = row['file_path']
        category = row['category']
        audio, sr = load_audio(file_path)
        
        # Create category subfolder if it doesn't exist
        category_folder = os.path.join(output_folder, category)
        os.makedirs(category_folder, exist_ok=True)

        # Augmentation techniques
        augmentations = [
            ('original', audio),
            ('noise', add_noise(audio)),
            ('speed_up', change_speed(audio, speed_factor=1.2)),
            ('speed_down', change_speed(audio, speed_factor=0.8)),
            ('pitch_up', change_pitch(audio, sr, n_steps=2)),
            ('pitch_down', change_pitch(audio, sr, n_steps=-2)),
            ('shift_forward', time_shift(audio, shift_max=0.2)),
            ('shift_backward', time_shift(audio, shift_max=-0.2)),
            ('volume_up', adjust_volume(audio, gain_factor=1.5)),
            ('volume_down', adjust_volume(audio, gain_factor=0.5)),
            ('reversed', reverse_audio(audio))
        ]
        
        # Save augmentations and add new rows
        for aug_type, aug_audio in augmentations:
            output_file = os.path.join(category_folder, f"{aug_type}_{index}.wav")
            sf.write(output_file, aug_audio, sr)  # Replaced librosa.output.write_wav() with sf.write
            
            # Append new row data (file_path and category)
            new_rows.append({
                'file_path': output_file,
                'category': category
            })
    
    # Add the new augmented rows to the original dataframe
    augmented_df = pd.DataFrame(new_rows)
    updated_df = pd.concat([finaldf, augmented_df], ignore_index=True)
    
    return updated_df

# Example usage
output_folder = 'augmented_audio_dataset'  # Output folder to save augmented files
updated_df = augment_audio_and_update_df(finaldf, output_folder)


In [173]:
updated_df = updated_df.sample(frac=1).reset_index(drop=True)

updated_df.head()

Unnamed: 0,file_path,category
0,augmented_audio_dataset\Seven\pitch_up_1029.wav,Seven
1,augmented_audio_dataset\two\pitch_up_1288.wav,two
2,augmented_audio_dataset\Forward\volume_up_311.wav,Forward
3,processed_recorded_audio\visual\visual_21.wav,visual
4,augmented_audio_dataset\Eight\shift_backward_1...,Eight


In [174]:
updated_df.shape

(18456, 2)

In [175]:
updated_df.to_csv('recorded_data_with_data_aug.csv', index=False)

preprocessing

In [176]:
import pandas as pd
import numpy as np
import librosa

# Function to preprocess the audio files
def preprocess_audio(audio, sr, target_sr=16000, max_length=5):
    # Resample audio if needed
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    
    # Normalize the audio
    audio = librosa.util.normalize(audio)
    
    # Trim leading and trailing silence
    audio, _ = librosa.effects.trim(audio)
    
    # Calculate the required padding
    target_length = target_sr * max_length
    if len(audio) < target_length:
        padding = target_length - len(audio)
        # Pad the audio file
        audio = np.pad(audio, (0, padding), 'constant')
    else:
        # If audio is longer than target length, truncate it
        audio = audio[:target_length]
    
    return audio, sr

# Function to preprocess a chunk of the DataFrame
def preprocess_chunk(df_chunk, target_sr=16000, max_length=5):
    processed_data = []
    
    for idx, row in df_chunk.iterrows():
        file_path = row['file_path']
        category = row['category']
        
        try:
            # Load the audio file
            audio, sr = librosa.load(file_path, sr=None)
            
            # Preprocess the audio
            preprocessed_audio, preprocessed_sr = preprocess_audio(audio, sr, target_sr, max_length)
            
            # Append preprocessed data
            processed_data.append({
                'file_path': file_path,
                'category': category,
                'audio': preprocessed_audio,
                'sr': preprocessed_sr
            })
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
    
    return pd.DataFrame(processed_data)

# Function to process the DataFrame in chunks
def process_dataframe_in_chunks(df, chunk_size=100, target_sr=16000, max_length=5):
    processed_chunks = []
    
    # Split DataFrame into chunks and process each chunk
    for chunk_idx, df_chunk in enumerate(np.array_split(updated_df, len(updated_df) // chunk_size + 1)):
        print(f"Processing chunk {chunk_idx + 1}")
        processed_chunk = preprocess_chunk(df_chunk, target_sr, max_length)
        processed_chunks.append(processed_chunk)
        del df_chunk  # Free memory
    
    # Concatenate all processed chunks into a single DataFrame
    processed_df = pd.concat(processed_chunks, ignore_index=True)
    
    return processed_df

# Example of how to use the function
# Assume df has the columns: file_path, category
preprocessed_df = process_dataframe_in_chunks(updated_df)


  return bound(*args, **kwds)


Processing chunk 1
Processing chunk 2
Processing chunk 3
Processing chunk 4
Processing chunk 5
Processing chunk 6
Processing chunk 7
Processing chunk 8
Processing chunk 9
Processing chunk 10
Processing chunk 11
Processing chunk 12
Processing chunk 13
Processing chunk 14
Processing chunk 15
Processing chunk 16
Processing chunk 17
Processing chunk 18
Processing chunk 19
Processing chunk 20
Processing chunk 21
Processing chunk 22
Processing chunk 23
Processing chunk 24
Processing chunk 25
Processing chunk 26
Processing chunk 27
Processing chunk 28
Processing chunk 29
Processing chunk 30
Processing chunk 31
Processing chunk 32
Processing chunk 33
Processing chunk 34
Processing chunk 35
Processing chunk 36
Processing chunk 37
Processing chunk 38
Processing chunk 39
Processing chunk 40
Processing chunk 41
Processing chunk 42
Processing chunk 43
Processing chunk 44
Processing chunk 45
Processing chunk 46
Processing chunk 47
Processing chunk 48
Processing chunk 49
Processing chunk 50
Processin

mfcc features

In [179]:
updated_df.head()

Unnamed: 0,file_path,category
0,augmented_audio_dataset\Seven\pitch_up_1029.wav,Seven
1,augmented_audio_dataset\two\pitch_up_1288.wav,two
2,augmented_audio_dataset\Forward\volume_up_311.wav,Forward
3,processed_recorded_audio\visual\visual_21.wav,visual
4,augmented_audio_dataset\Eight\shift_backward_1...,Eight


In [180]:
import pandas as pd
import numpy as np
import librosa


In [181]:
def extract_mfcc_features(audio, sr, n_mfcc=13):
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    # Take the mean of the MFCC coefficients over time
    mfcc_mean = mfcc.mean(axis=1)
    return mfcc_mean


In [182]:
def process_audio(file_path, target_sr=16000, n_mfcc=13):
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None)
    
    # Resample and preprocess audio
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    
    audio = librosa.util.normalize(audio)
    audio, _ = librosa.effects.trim(audio)
    
    target_length = target_sr * 5  # Max length in seconds (e.g., 5 seconds)
    if len(audio) < target_length:
        padding = target_length - len(audio)
        audio = np.pad(audio, (0, padding), 'constant')
    else:
        audio = audio[:target_length]
    
    # Extract MFCC features
    mfcc_features = extract_mfcc_features(audio, sr, n_mfcc)
    
    return mfcc_features


In [183]:
def process_dataframe_in_chunks(newdf, chunk_size=100, target_sr=16000, n_mfcc=14):
    # Initialize lists to store processed data
    mfcc_features_list = []
    file_paths = []
    
    for chunk_idx, df_chunk in enumerate(np.array_split(updated_df, len(updated_df) // chunk_size + 1)):
        print(f"Processing chunk {chunk_idx + 1}")
        
        for idx, row in df_chunk.iterrows():
            file_path = row['file_path']
            
            try:
                # Process audio file and extract MFCC features
                mfcc_features = process_audio(file_path, target_sr, n_mfcc)
                mfcc_features_list.append(mfcc_features)
                file_paths.append(file_path)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
        
        # Free memory
        del df_chunk

    # Convert lists to DataFrame
    mfcc_df = pd.DataFrame(mfcc_features_list, columns=[f'mfcc_{i}' for i in range(n_mfcc)])
    result_df = pd.DataFrame({'file_path': file_paths}).join(mfcc_df)

    return result_df


In [184]:
# Example usage
# Assume df has the columns: file_path, category
mfcc_df = process_dataframe_in_chunks(updated_df)

# Merge MFCC features with the original DataFrame
updated_df = updated_df.merge(mfcc_df, on='file_path', how='left')


  return bound(*args, **kwds)


Processing chunk 1
Processing chunk 2
Processing chunk 3
Processing chunk 4
Processing chunk 5
Processing chunk 6
Processing chunk 7
Processing chunk 8
Processing chunk 9
Processing chunk 10
Processing chunk 11
Processing chunk 12
Processing chunk 13
Processing chunk 14
Processing chunk 15
Processing chunk 16
Processing chunk 17
Processing chunk 18
Processing chunk 19
Processing chunk 20
Processing chunk 21
Processing chunk 22
Processing chunk 23
Processing chunk 24
Processing chunk 25
Processing chunk 26
Processing chunk 27
Processing chunk 28
Processing chunk 29
Processing chunk 30
Processing chunk 31
Processing chunk 32
Processing chunk 33
Processing chunk 34
Processing chunk 35
Processing chunk 36
Processing chunk 37
Processing chunk 38
Processing chunk 39
Processing chunk 40
Processing chunk 41
Processing chunk 42
Processing chunk 43
Processing chunk 44
Processing chunk 45
Processing chunk 46
Processing chunk 47
Processing chunk 48
Processing chunk 49
Processing chunk 50
Processin

In [185]:
updated_df.to_csv('recorderd_data_with_data_aug_and_features.csv', index=False)

In [186]:
updated_df.head()

Unnamed: 0,file_path,category,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13
0,augmented_audio_dataset\Seven\pitch_up_1029.wav,Seven,-499.928345,9.105547,2.492326,4.310031,1.54268,-0.107071,-1.836942,1.100151,0.928557,0.948992,0.234807,0.320373,1.025592,0.396697
1,augmented_audio_dataset\two\pitch_up_1288.wav,two,-447.733368,11.961545,5.061387,-0.527899,-1.532452,0.944962,0.123386,-3.872755,-0.613846,-4.191105,-1.026834,-2.010867,0.851715,-0.05487
2,augmented_audio_dataset\Forward\volume_up_311.wav,Forward,-488.926239,18.606514,-0.69039,0.179542,2.166796,-0.838498,-0.456715,-2.516395,-0.679739,-2.244136,0.659838,2.30694,-2.561234,1.074301
3,processed_recorded_audio\visual\visual_21.wav,visual,-500.696594,13.200192,-1.008743,4.651892,1.181455,-1.134971,-1.300725,0.395899,-2.419369,-0.320029,-3.210164,0.794875,-0.942491,0.316947
4,augmented_audio_dataset\Eight\shift_backward_1...,Eight,-479.69458,9.368511,-0.375977,5.12713,-1.506138,-2.261334,-0.785565,-1.900638,-1.586725,-2.909011,-0.681705,-0.533147,-0.656668,-0.388845


In [187]:
updated_df.shape

(18456, 16)

In [None]:
# freezing few layers of the trained model and then training last few layers and adding new layers as well to fine tunr

In [188]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout, Flatten,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [189]:
# Extract MFCC features and labels from the dataframe
X = updated_df.iloc[:, 2:].values  # Extract MFCC columns mfcc_0 to mfcc_13
y = updated_df['category'].values  # Extract labels (categories)


In [190]:
# Convert the category labels to numeric form
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# One-hot encode the labels (since we have 36 categories)
y_one_hot = to_categorical(y_encoded, num_classes=36)


In [191]:
# Split the data into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)


In [192]:
# Reshape the MFCC features to include a 'channel' dimension if required by the model
X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)


In [193]:
# Path to the directory containing the saved model (.pb file)
saved_model_dir = 'model'

# Load the model from the SavedModel format
model = tf.keras.models.load_model(saved_model_dir)

# Display the model summary to understand its structure
model.summary()


Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_35 (Conv1D)          (None, 14, 128)           768       
                                                                 
 batch_normalization (BatchN  (None, 14, 128)          512       
 ormalization)                                                   
                                                                 
 dropout_30 (Dropout)        (None, 14, 128)           0         
                                                                 
 conv1d_36 (Conv1D)          (None, 14, 256)           98560     
                                                                 
 batch_normalization_1 (Batc  (None, 14, 256)          1024      
 hNormalization)                                                 
                                                                 
 dropout_31 (Dropout)        (None, 14, 256)          

In [194]:
# this is the last layer before flatten
x = model.layers[8].output.shape 
x

TensorShape([None, 14, 256])

In [195]:
from tensorflow.keras.layers import Conv1D, BatchNormalization, ReLU, Dense, Dropout, Flatten, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the pre-trained model
# model = load_model('path_to_your_trained_model.pb')  # Adjust if needed

# Freeze all layers except the last few
for layer in model.layers[:-4]:  # Adjust the number of layers to freeze
    layer.trainable = False

# Check if layers are set to non-trainable
for layer in model.layers:
    print(f"{layer.name}: {'Trainable' if layer.trainable else 'Non-trainable'}")

# Create a new model with the same input as the original
inputs = model.input

# Take output from a deeper layer (before Flatten)
x = model.layers[8].output  # Adjust to the last Conv1D layer you want to keep

# Add new Conv1D layers for more complex patterns
x = Conv1D(filters=512, kernel_size=5, padding='same', strides=1)(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Dropout(0.2)(x)

x = Conv1D(filters=512, kernel_size=3, padding='same', strides=1)(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Dropout(0.2)(x)

x = Conv1D(filters=512, kernel_size=3, padding='same', strides=1)(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Dropout(0.2)(x)

# Add Global Average Pooling
x = GlobalAveragePooling1D()(x)

# Fully connected Dense layers for fine-tuning
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dense(256, activation='relu')(x)

# Output layer for classification (36 categories)
outputs = Dense(36, activation='softmax')(x)

# Create a new model
fine_tuned_model = Model(inputs=inputs, outputs=outputs)

# Compile the new model
optimizer = Adam(learning_rate=1e-5)  # Lower learning rate for fine-tuning
fine_tuned_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary to ensure everything looks good
fine_tuned_model.summary()


conv1d_35: Non-trainable
batch_normalization: Non-trainable
dropout_30: Non-trainable
conv1d_36: Non-trainable
batch_normalization_1: Non-trainable
dropout_31: Non-trainable
conv1d_37: Non-trainable
batch_normalization_2: Non-trainable
dropout_32: Non-trainable
flatten_7: Non-trainable
dense_20: Non-trainable
dropout_33: Trainable
dense_21: Trainable
dropout_34: Trainable
dense_22: Trainable
Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_35_input (InputLayer  [(None, 14, 1)]          0         
 )                                                               
                                                                 
 conv1d_35 (Conv1D)          (None, 14, 128)           768       
                                                                 
 batch_normalization (BatchN  (None, 14, 128)          512       
 ormalization)                                             

In [196]:
# Fine-tune the model on the new dataset
history = fine_tuned_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=32)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [197]:
# Save the fine-tuned model
fine_tuned_model.save('fine_tuned_model', save_format='tf')  # Saves in the SavedModel format (.pb)




INFO:tensorflow:Assets written to: fine_tuned_model\assets


INFO:tensorflow:Assets written to: fine_tuned_model\assets
