In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

from google.colab import files

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Activation, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
file_path = "/content/drive/MyDrive/Miniproject_ML/Sound datasets (csv)/mfcc_features_fixed.csv"
df = pd.read_csv(file_path)

In [None]:
column_name = "label"
df[column_name].value_counts()

In [None]:
# ใช้ LabelEncoder แปลงชื่อคลาสเป็นตัวเลข
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

# แปลงเป็น One-Hot Encoding
y_onehot = to_categorical(df["label"], num_classes=len(label_encoder.classes_))

print("✅ One-Hot Shape:", y_onehot.shape)
print("✅ Mapping:", dict(zip(label_encoder.classes_, range(len(label_encoder.classes_)))))

In [None]:
extracted_features = []
for i in range(len(df)):
    row_data = df.iloc[i, : -1].tolist()
    last_two_columns = df.iloc[i, -1:].tolist()
    extracted_features.append([row_data, last_two_columns])

In [None]:
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

In [None]:
shuffled_extracted_features_df = extracted_features_df.sample(frac=1, random_state=42).reset_index(drop=True)
shuffled_extracted_features_df.head()

In [None]:
X = np.array(shuffled_extracted_features_df['feature'].tolist())
y = np.array(shuffled_extracted_features_df['class'].tolist())

In [None]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val)

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

joblib.dump(scaler, "/content/scaler.pkl")

In [None]:
print("Shape X_train: {}".format(X_train.shape))
print("Shape y_train: {}".format(y_train.shape))
print("Shape X_val: {}".format(X_val.shape))
print("Shape y_val: {}".format(y_val.shape))
print("Shape X_test: {}".format(X_test.shape))
print("Shape y_test: {}".format(y_test.shape))

In [None]:
X_train = np.expand_dims(X_train, axis=1)
X_val = np.expand_dims(X_val, axis=1)
X_test = np.expand_dims(X_test, axis=1)

print("Shape X_train: {}".format(X_train.shape))
print("Shape X_val: {}".format(X_val.shape))
print("Shape X_test: {}".format(X_test.shape))

In [None]:
print("ค่าที่มีใน y_train:", np.unique(y_train))

In [None]:
num_classes = 7

# ✅ แปลง Label เป็น One-Hot Encoding
y_train = to_categorical(y_train, num_classes=num_classes)
y_val = to_categorical(y_val, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

# ✅ ตรวจสอบขนาดของข้อมูลหลังจากแปลง
print("✅ y_train shape:", y_train.shape) 
print("✅ y_val shape:", y_val.shape)  
print("✅ y_test shape:", y_test.shape) 


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

model = Sequential()

model.add(LSTM(2048, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))  # Layer แรก
model.add(Dropout(0.2))

model.add(LSTM(1024, return_sequences=False))  # Layer สุดท้ายก่อน Dense
model.add(Dropout(0.2))

model.add(Dense(7, activation='softmax'))  # 7 classes

model.compile(optimizer='RMSprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,                       # ข้อมูลเทรน
    batch_size = 32,                        # ขนาดของ batch
    epochs = 50,                            # จำนวนรอบการเรียนรู้
    validation_data = (X_val, y_val),       # ข้อมูล Validation
    verbose = 1,                            # ระดับการแสดงผล (1 = แสดงผลเต็ม, 0 = ไม่แสดง)
    callbacks = [early_stopping]
)

In [None]:
import matplotlib.pyplot as plt

# Plot Training & Validation Accuracy
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

# Plot Training & Validation Loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')

plt.tight_layout()
plt.show()

In [None]:
final_train_accuracy = history.history['accuracy'][-1] 
final_val_accuracy = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_accuracy:.4f}")
print(f"Final Validation Accuracy: {final_val_accuracy:.4f}")

In [None]:
# ประเมินโมเดลบนชุด Test
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
y_pred = (model.predict(X_test) > 0.5).astype(int)

In [None]:
# ✅ แปลง One-Hot Encoding กลับเป็นตัวเลข (0, 1, 2, ..., 6)
y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

# ✅ แสดงรายงานผล
print(classification_report(y_test_labels, y_pred_labels))

# ✅ แสดง Confusion Matrix
conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
a_number_of_example = 627

In [None]:
# คำนวณความถูกต้อง (Accuracy)
accuracy = sum([pred == true for pred, true in zip(y_pred_labels[:a_number_of_example], y_test_labels[:a_number_of_example])]) / len(y_test_labels[:a_number_of_example])
print(f"Accuracy: {accuracy:.2f}")

# แสดงรายการที่ทำนายผิด
print("\nMismatched Predictions:")
for i, (pred, true) in enumerate(zip(y_pred_labels[:a_number_of_example], y_test_labels[:a_number_of_example])):
    if pred != true:
        print(f"Index: {i}, Predicted: {pred}, Actual: {true}")