In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier





In [None]:
# Load dataset
df = pd.read_csv('merged_dataset.csv')
# Define the column names
columns = ['mfcc_' + str(i) for i in range(1, 301)] + ['label']

# Assign the column names to the DataFrame
df.columns = columns

# Print the DataFrame with headers
print(df.head())

In [None]:
df.info()
df.describe()

In [None]:
# Configure TensorFlow to use GPU
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

# Print GPU devices
tf.test.gpu_device_name()
gpus = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpus))
for gpu in gpus:
    print(gpu)

# Print session data
print(sess)

In [None]:

X = df.drop('label', axis=1).values.astype(np.float32)  # Features
#y = df['label'].values  # Labels
y = df['label'].values.astype(np.float32)  # Labels


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a standard scaler
scaler = StandardScaler()

# SVM Model
svm_model = SVC()
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm_model)
])

# Random Forest Model
rf_model = RandomForestClassifier()
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', rf_model)
])

# RNN Model
def create_rnn_model():
    strategy = tf.distribute.OneDeviceStrategy('GPU:0')  # Use the first GPU
    with strategy.scope():
        model = Sequential()
        model.add(LSTM(64, input_shape=(X_train.shape[1], 1), return_sequences=True))
        model.add(LSTM(32, return_sequences=False))
        model.add(Dense(4, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# CNN Model
def create_cnn_model():
    strategy = tf.distribute.OneDeviceStrategy('GPU:0')  # Use the first GPU
    with strategy.scope():
        model = Sequential()
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
        model.add(Dense(50, activation='relu'))
        model.add(Dense(4, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Wrap Keras models for use in scikit-learn
rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=10, verbose=1)
cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=10, batch_size=10, verbose=1)

# Fit and evaluate models
models = {#'SVM': svm_pipeline,
          #'Random Forest': rf_pipeline,
          'RNN': rnn_model,
          'CNN': cnn_model}

accuracy_values = []
reports = []

for model_name, model in models.items():
    if model_name in ['RNN', 'CNN']:
        
        model.fit(X_train, to_categorical(y_train))
        y_pred = model.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    accuracy = model.score(X_test, y_test)
    accuracy_values.append(accuracy)

    report = classification_report(y_test, y_pred)
    reports.append(report)

    print(f"{model_name} Accuracy: {accuracy}")
    print(report)
    print()

model_names = list(models.keys())
plt.bar(model_names, accuracy_values)
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Model Accuracy Comparison')
plt.show()

for model_name, report in zip(model_names, reports):
    print(f"Classification Report for {model_name}:")
    print(report)
    print()

best_model_name = max(models, key=lambda name: models[name].score(X_test, y_test))
best_model = models[best_model_name]
print(f"Best Model: {best_model_name}")

In [None]:


# Configure TensorFlow to use GPU
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

# Load dataset
df = pd.read_csv('/path/to/dataset.csv')

# Define the column names
columns = ['mfcc_' + str(i) for i in range(1, 301)] + ['label']

# Assign the column names to the DataFrame
df.columns = columns

# Print the DataFrame with headers
print(df.head())

X = df.drop('label', axis=1).values  # Features
y = df['label'].values  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a standard scaler
scaler = StandardScaler()

# SVM Pipeline
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

# Random Forest Pipeline
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier())
])

# RNN Model
def create_rnn_model():
    strategy = tf.distribute.OneDeviceStrategy('GPU:0')  # Use the first GPU
    with strategy.scope():
        model = Sequential()
        model.add(LSTM(64, input_shape=(X_train.shape[1], 1), return_sequences=True))
        model.add(LSTM(32, return_sequences=False))
        model.add(Dense(4, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# CNN Model
def create_cnn_model():
    strategy = tf.distribute.OneDeviceStrategy('GPU:0')  # Use the first GPU
    with strategy.scope():
        model = Sequential()
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
        model.add(Dense(50, activation='relu'))
        model.add(Dense(4, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Wrap Keras models for use in scikit-learn
rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=10, verbose=1)
cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=10, batch_size=10, verbose=1)

# Define parameter grid for GridSearchCV
svm_param_grid = {'svm__C': [1, 10], 'svm__gamma': [0.001, 0.01]}
rf_param_grid = {'rf__n_estimators': [50, 100], 'rf__max_depth': [10, 20]}
rnn_param_grid = {'batch_size': [10, 20], 'epochs': [10, 20]}
cnn_param_grid = {'batch_size': [10, 20], 'epochs': [10, 20]}

# Define cross-validation folds
cv = KFold(n_splits=5, random_state=42, shuffle=True)

# Perform GridSearchCV
grids = {}
for model_name, pipeline, param_grid in [('SVM', svm_pipeline, svm_param_grid),
                                         ('RandomForest', rf_pipeline, rf_param_grid),
                                         ('RNN', rnn_model, rnn_param_grid),
                                         ('CNN', cnn_model, cnn_param_grid)]:
    grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, n_jobs=-1, cv=cv, verbose=2)
    grid_search.fit(X_train, to_categorical(y_train) if model_name in ['RNN', 'CNN'] else y_train)
    grids[model_name] = grid_search

# Evaluate models
for model_name, grid in grids.items():
    if model_name in ['RNN', 'CNN']:
        y_pred = grid.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)
    else:
        y_pred = grid.predict(X_test)
    print(f"{model_name} Best Params: {grid.best_params_}")
    print(f"{model_name} Accuracy: {grid.best_score_}")
    print(f"{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))

# Select the best model
best_model_name = max(grids, key=lambda name: grids[name].best_score_)
best_model = grids[best_model_name].best_estimator_
print(f"Best Model: {best_model_name}")

In [None]:
# X = df.drop('label', axis=1).values  # Features
# y = df['label'].values  # Labels
# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Define a standard scaler
# scaler = StandardScaler()

# # SVM Pipeline
# svm_pipeline = Pipeline([
#     ('scaler', StandardScaler()),
#     ('svm', SVC())
# ])

# # Random Forest Pipeline
# rf_pipeline = Pipeline([
#     ('scaler', StandardScaler()),
#     ('rf', RandomForestClassifier())
# ])

# # RNN Model
# def create_rnn_model():
#     model = Sequential()
#     model.add(LSTM(64, input_shape=(X_train.shape[1], 1), return_sequences=True))
#     model.add(LSTM(32, return_sequences=False))
#     model.add(Dense(4, activation='softmax'))
#     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#     return model

# # CNN Model
# def create_cnn_model():
#     model = Sequential()
#     model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Flatten())
#     model.add(Dense(50, activation='relu'))
#     model.add(Dense(4, activation='softmax'))
#     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#     return model

# # Wrap Keras models for use in scikit-learn
# rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=10, verbose=0)
# cnn_model = KerasClassifier(build_fn=create_cnn_model, epochs=10, batch_size=10, verbose=0)

# # Define parameter grid for GridSearchCV
# svm_param_grid = {'svm__C': [1, 10], 'svm__gamma': [0.001, 0.01]}
# rf_param_grid = {'rf__n_estimators': [50, 100], 'rf__max_depth': [10, 20]}
# rnn_param_grid = {'batch_size': [10, 20], 'epochs': [10, 20]}
# cnn_param_grid = {'batch_size': [10, 20], 'epochs': [10, 20]}

# # Define cross-validation folds
# cv = KFold(n_splits=5, random_state=42, shuffle=True)

# # Perform GridSearchCV
# grids = {}
# for model_name, pipeline, param_grid in [('SVM', svm_pipeline, svm_param_grid),
#                                          ('RandomForest', rf_pipeline, rf_param_grid),
#                                          ('RNN', rnn_model, rnn_param_grid),
#                                          ('CNN', cnn_model, cnn_param_grid)]:
#     grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, n_jobs=-1, cv=cv)
#     grid_search.fit(X_train, to_categorical(y_train) if model_name in ['RNN', 'CNN'] else y_train)
#     grids[model_name] = grid_search

# # Evaluate models
# for model_name, grid in grids.items():
#     if model_name in ['RNN', 'CNN']:
#         y_pred = grid.predict(X_test)
#         y_pred = np.argmax(y_pred, axis=1)
#     else:
#         y_pred = grid.predict(X_test)
#     print(f"{model_name} Best Params: {grid.best_params_}")
#     print(f"{model_name} Accuracy: {grid.best_score_}")
#     print(classification_report(y_test, y_pred))

# # Select the best model
# best_model_name = max(grids, key=lambda name: grids[name].best_score_)
# best_model = grids[best_model_name].best_estimator_
# print(f"Best Model: {best_model_name}")

# # Save the model if needed
# # best_model.model.save('best_model.h5')  # Uncomment this line to save the Keras model
