In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import scipy.stats

In [None]:

folder_path = '/content/drive/MyDrive/PSD/dataset_audio/TESS Toronto emotional speech set data'


subfolders = ['OAF_angry',"OAF_disgust",'OAF_Fear','OAF_happy','OAF_neutral','OAF_Pleasant_surprise','OAF_Sad','YAF_angry','YAF_disgust','YAF_fear','YAF_happy','YAF_neutral','YAF_pleasant_surprised','YAF_sad']

In [None]:
data = {
    'File Name': [],
    'Std' : [],
    'Mean': [],
    'Median': [],
    'Min': [],
    'Max': [],
    'Skewness': [],
    'Kurtosis': [],
    'Mode': [],
    'Q1': [],
    'Q3': [],
    'IQR': [],
    'ZCR Mean': [],
    'ZCR Median': [],
    'ZCR Std': [],
    'ZCR Kurtosis': [],
    'ZCR Skewness': [],
    'RMSE Mean': [],
    'RMSE Median': [],
    'RMSE Std': [],
    'RMSE Kurtosis': [],
    'RMSE Skewness': [],
    'Label': []
}

In [None]:
# Loop melalui setiap subfolder yang ingin digunakan
for subfolder in subfolders:
    subfolder_path = os.path.join(folder_path, subfolder)

    # List semua file audio .wav dalam subfolder
    audio_files = [f for f in os.listdir(subfolder_path) if f.endswith('.wav')]

    # Loop melalui setiap file audio .wav
    for audio_file in audio_files:
        audio_file_path = os.path.join(subfolder_path, audio_file)

        x, sr = librosa.load(audio_file_path, sr=None)
        zcr = librosa.feature.zero_crossing_rate(x)

        # Menghitung statistik dari file audio
        mean = np.mean(x)
        std = np.std(x)
        median = np.median(x)
        min_value = np.min(x)
        max_value = np.max(x)
        skewness = scipy.stats.skew(x)
        kurtosis = scipy.stats.kurtosis(x)
        mode = scipy.stats.mode(x)[0]
        q1 = np.quantile(x, 0.25)
        q3 = np.quantile(x, 0.75)
        iqr = scipy.stats.iqr(x)

        zcr_mean = zcr.mean()
        zcr_median = np.median(zcr)
        zcr_std = np.std(zcr)
        zcr_kurtosis = scipy.stats.kurtosis(zcr.ravel())
        zcr_skewness = scipy.stats.skew(zcr.ravel())

        # x_normalized = (x - np.mean(x)) / np.std(x)
        # rmse = np.sqrt(np.mean(np.square(x_normalized)))
        rmse = np.sqrt(np.mean(np.square(x)))


        rms_mean = rmse.mean()
        rms_median = np.median(rmse)
        rms_std = np.std(rmse)
        rms_kurtosis = scipy.stats.kurtosis(rmse, nan_policy='omit')
        rms_skewness = scipy.stats.skew(rmse, nan_policy='omit')



        # Menentukan label berdasarkan subfolder
        label = subfolder.split('_')[-1]

        # Menambahkan hasil statistik dan label ke dalam list
        data['File Name'].append(audio_file)
        data['Mean'].append(mean)
        data['Std'].append(std)
        data['Median'].append(median)
        data['Min'].append(min_value)
        data['Max'].append(max_value)
        data['Skewness'].append(skewness)
        data['Kurtosis'].append(kurtosis)
        data['Mode'].append(mode)
        data['Q1'].append(q1)
        data['Q3'].append(q3)
        data['IQR'].append(iqr)
        data['ZCR Mean'].append(zcr_mean)
        data['ZCR Median'].append(zcr_median)
        data['ZCR Std'].append(zcr_std)
        data['ZCR Kurtosis'].append(zcr_kurtosis)
        data['ZCR Skewness'].append(zcr_skewness)
        data['RMSE Mean'].append(rms_mean)
        data['RMSE Median'].append(rms_median)
        data['RMSE Std'].append(rms_std)
        data['RMSE Kurtosis'].append(rms_kurtosis)
        data['RMSE Skewness'].append(rms_skewness)
        data['Label'].append(label)


In [None]:
# Membuat DataFrame dari data
df = pd.DataFrame(data)
df

Unnamed: 0,File Name,Std,Mean,Median,Min,Max,Skewness,Kurtosis,Mode,Q1,...,ZCR Median,ZCR Std,ZCR Kurtosis,ZCR Skewness,RMSE Mean,RMSE Median,RMSE Std,RMSE Kurtosis,RMSE Skewness,Label
0,OAF_home_angry.wav,0.060812,-0.000009,0.000031,-0.300964,0.259369,-0.099696,2.161713,-0.003723,-0.019836,...,0.051758,0.094175,7.665817,2.880071,0.060812,0.060812,0.0,,,angry
1,OAF_hush_angry.wav,0.029584,0.000039,0.000488,-0.186340,0.177765,-0.103248,5.972783,0.001801,-0.006622,...,0.058105,0.084501,6.003556,2.495010,0.029584,0.029584,0.0,,,angry
2,OAF_cool_angry.wav,0.038097,-0.000050,-0.000641,-0.215668,0.203094,0.016051,3.444994,-0.002045,-0.012726,...,0.059570,0.084973,6.767254,2.679671,0.038097,0.038097,0.0,,,angry
3,OAF_calm_angry.wav,0.039653,-0.000068,0.000397,-0.207123,0.199158,-0.226809,2.833062,-0.001495,-0.013763,...,0.062256,0.072364,8.668655,2.876723,0.039654,0.039654,0.0,,,angry
4,OAF_bite_angry.wav,0.054161,-0.000185,-0.000183,-0.296997,0.266754,-0.098874,3.936222,-0.000183,-0.013428,...,0.053711,0.081337,7.479539,2.809658,0.054162,0.054162,0.0,,,angry
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,YAF_vine_sad.wav,0.056173,-0.000028,0.003387,-0.228271,0.127930,-1.156216,1.965374,0.000702,-0.016418,...,0.025391,0.201026,2.814359,2.113713,0.056173,0.056173,0.0,,,sad
2796,YAF_shall_sad.wav,0.040145,-0.000031,0.002502,-0.172272,0.109985,-0.823658,1.394181,0.000305,-0.015594,...,0.026367,0.225334,1.351650,1.656876,0.040145,0.040145,0.0,,,sad
2797,YAF_met_sad.wav,0.045512,-0.000026,0.002045,-0.204285,0.123810,-1.077722,2.682899,-0.000214,-0.010345,...,0.037109,0.188665,3.391794,2.138059,0.045512,0.045512,0.0,,,sad
2798,YAF_team_sad.wav,0.043499,-0.000042,0.000824,-0.201691,0.138916,-0.804279,1.975053,-0.000153,-0.014557,...,0.037109,0.206059,2.709638,2.019677,0.043499,0.043499,0.0,,,sad


In [None]:
csv_file_path = '/content/drive/MyDrive/PSD/Tugas3/result.csv'
df.to_csv(csv_file_path, index=False, na_rep='NaN')

print("Data telah diekspor ke:", csv_file_path)

Data telah diekspor ke: /content/drive/MyDrive/PSD/Tugas3/result.csv


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets  import make_blobs
import pandas as pd
from pickle import dump


csv_file_path = '/content/drive/MyDrive/PSD/Tugas3/result.csv'


df = pd.read_csv(csv_file_path)

X, y = make_blobs(n_samples=100, centers=2, n_features=19, random_state=1)

X_train, _, y_train, _ = train_test_split(X, y, test_size=0.33, random_state=1)

# Mengambil fitur-fitur dari DataFrame (kecuali 'File Name' dan 'Label')
features = df.drop(columns=['File Name', 'Label','RMSE Kurtosis',	'RMSE Skewness'	 ])
# Menggunakan StandardScaler untuk mentransformasi fitur-fitur
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)



# save the scaler

dump(scaler, open('/content/drive/MyDrive/PSD/Tugas3/scaler.pkl', 'wb'))
# scaler_audio = StandardScaler()
# scaled_features = scaler_audio.fit_transform(features)

# Split data menjadi data train dan data test
# Split data menjadi X dan y
X = features
y = df['Label']
# X_train dan y_train adalah data pelatihan dan labelnya
# X_test adalah data uji yang ingin di normalisasi
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from pickle import load



# Menggunakan StandardScaler untuk mentransformasi fitur-fitur
# scaler_audio = StandardScaler()
# scaled_features = scaler_audio.fit_transform(features)


# Normalisasi data pelatihan
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

dummy_data = np.array([[0.3,0.52,	0.9,0.12,0.131,	2.3,	0.12	,0.03,
                       0.03,2.019,3.05,0.82 ,0.05,	0.08, 3.8,	8.8	,0.04,	0.042	,0.3],[
                          0.056173,-0.000028,0.003387,-0.228271,0.127930,-1.156216,1.965374,0.000702,-0.016418,
                          0.025391,0.201026,2.814359,2.113713,0.056173,0.056173,9.302	,3.2,4.5,3.9
                       ]])

scaler = load(open('/content/drive/MyDrive/PSD/Tugas3/scaler.pkl', 'rb'))
# Normalisasi data dummy dengan metode Min-Max
dummy_data_scaled = scaler.transform(dummy_data)

# Menggunakan StandardScaler yang sama untuk normalisasi dummy_data
dummy_data_scaled = scaler.transform(dummy_data)

# Menggunakan MinMaxScaler untuk dataset make_blobs
scaler_blobs = MinMaxScaler()

# Fitscaler train dataset make_blobs
scaler_blobs.fit(X_train)

# Transformasi dataset make_blobs
X_train_blobs_scaled = scaler_blobs.transform(X_train)
X_test_blobs_scaled = scaler_blobs.transform(X_test)

# Mengecek rentang nilai setiap fitur setelah penskalaan
for i in range(X_test_blobs_scaled.shape[1]):
    print('>%d, train: min=%.3f, max=%.3f, test: min=%.3f, max=%.3f' %
          (i, X_train_blobs_scaled[:, i].min(), X_train_blobs_scaled[:, i].max(),
           X_test_blobs_scaled[:, i].min(), X_test_blobs_scaled[:, i].max()))
print("Dimensi X_train:", X_train.shape)

>0, train: min=0.000, max=1.000, test: min=0.984, max=1.001
>1, train: min=0.000, max=1.000, test: min=-0.443, max=-0.443
>2, train: min=0.000, max=1.000, test: min=0.487, max=0.487
>3, train: min=0.000, max=1.000, test: min=1.299, max=1.545
>4, train: min=0.000, max=1.000, test: min=0.575, max=0.640
>5, train: min=0.000, max=1.000, test: min=0.250, max=0.567
>6, train: min=0.000, max=1.000, test: min=0.456, max=5.390
>7, train: min=0.000, max=1.000, test: min=1.132, max=1.133
>8, train: min=0.000, max=1.000, test: min=1.056, max=1.060
>9, train: min=0.000, max=1.000, test: min=0.653, max=0.657
>10, train: min=0.000, max=1.000, test: min=0.314, max=0.321
>11, train: min=0.000, max=1.000, test: min=0.614, max=0.630
>12, train: min=0.000, max=1.000, test: min=0.975, max=0.998
>13, train: min=0.000, max=1.000, test: min=-0.678, max=-0.647
>14, train: min=0.000, max=1.000, test: min=0.730, max=1.406
>15, train: min=0.000, max=1.000, test: min=-0.119, max=0.379
>16, train: min=0.000, max=1.



In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
import joblib
from sklearn.datasets import make_classification
import numpy as np


X_synthetic, y_synthetic = make_blobs(n_samples=100, n_features=18, centers=3, random_state=42)

# Split fitur (X) dan labels (y) menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X_synthetic, y_synthetic, test_size=0.2, random_state=1)
# 1. Split fitur (X) dan labels (y) menjadi 80-20
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Menghasilkan data sintetis dengan 100 sampel


# print("Ukuran Data Latih (X_train, y_train):", X_train.shape, y_train.shape)
# print("Ukuran Data Uji (X_test, y_test):", X_test.shape, y_test.shape)
# # print("Ukuran Data Latih (X_train, y_train):", X_synthetic.shape, y_synthetic.shape)


# # 2. Normalisasi dengan metode Min-Max
# minmax_scaler = MinMaxScaler()
# X_train_minmax_scaled = minmax_scaler.fit_transform(X_train)
# X_test_minmax_scaled = minmax_scaler.transform(X_test)

# # 3. Penanganan NaN
# imputer = SimpleImputer(strategy='mean')
# X_train_minmax_scaled = imputer.fit_transform(X_train_minmax_scaled)
# X_test_minmax_scaled = imputer.transform(X_test_minmax_scaled)

# # 4. Membuat dan melatih model K-Nearest Neighbors (KNN)
# knn_model = KNeighborsClassifier(n_neighbors=35)
# # 0.7178571428571429
# knn_model.fit(X_train_minmax_scaled, y_train)

# # 5. Membuat dan melatih model Neural Network
# nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=1)
# nn_model.fit(X_train_minmax_scaled, y_train)

# y_pred_knn = knn_model.predict(X_test_minmax_scaled)
# accuracy_knn = accuracy_score(y_test, y_pred_knn)
# print("Akurasi Model KNN:", accuracy_knn)

# y_pred_nn = nn_model.predict(X_test_minmax_scaled)
# accuracy_nn = accuracy_score(y_test, y_pred_nn)
# print("Akurasi Model Neural Network:", accuracy_nn)

# # 6. Menyimpan kedua model
# joblib.dump(knn_model, '/content/drive/MyDrive/PSD/Tugas3/knn_model.pkl')
# joblib.dump(nn_model, '/content/drive/MyDrive/PSD/Tugas3/nn_model.pkl')

# # 7. Melakukan prediksi dengan kedua model dengan data sintetis
# X_synthetic_scaled = minmax_scaler.transform(X_synthetic)  # Normalisasi data sintetis

# # Prediksi dengan model KNN
# synthetic_pred_knn = knn_model.predict(X_synthetic_scaled)

# # Prediksi dengan model Neural Network
# synthetic_pred_nn = nn_model.predict(X_synthetic_scaled)

# print("Prediksi Model KNN untuk Data Sintetis:", synthetic_pred_knn)
# print("Prediksi Model Neural Network untuk Data Sintetis:", synthetic_pred_nn)


NameError: ignored

In [None]:
# rom sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.neural_network import MLPClassifier
# from sklearn.metrics import accuracy_score
# from sklearn.impute import SimpleImputer
# import joblib
# from sklearn.datasets import make_blobs
# import numpy as np

# # 1. Split fitur (X) dan labels (y) menjadi 80-20
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# X_synthetic, y_synthetic = make_blobs(n_samples=100, n_features=2, centers=3, random_state=42)



# print("Ukuran Data Latih (X_train, y_train):", X_train.shape, y_train.shape)
# print("Ukuran Data Uji (X_test, y_test):", X_test.shape, y_test.shape)

# # 2. Normalisasi dengan metode Min-Max
# minmax_scaler = MinMaxScaler()
# X_train_minmax_scaled = minmax_scaler.fit_transform(X_train)
# X_test_minmax_scaled = minmax_scaler.transform(X_test)

# # 3. Penanganan NaN
# imputer = SimpleImputer(strategy='mean')
# X_train_minmax_scaled = imputer.fit_transform(X_train_minmax_scaled)
# X_test_minmax_scaled = imputer.transform(X_test_minmax_scaled)

# # 4. Membuat dan melatih model K-Nearest Neighbors (KNN)
# knn_model = KNeighborsClassifier(n_neighbors=35)
# # 0.7178571428571429
# knn_model.fit(X_train_minmax_scaled, y_train)

# # 5. Membuat dan melatih model Neural Network
# nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=1)
# nn_model.fit(X_train_minmax_scaled, y_train)

# y_pred_knn = knn_model.predict(X_test_minmax_scaled)
# accuracy_knn = accuracy_score(y_test, y_pred_knn)
# print("Akurasi Model KNN:", accuracy_knn)

# y_pred_nn = nn_model.predict(X_test_minmax_scaled)
# accuracy_nn = accuracy_score(y_test, y_pred_nn)
# print("Akurasi Model Neural Network:", accuracy_nn)


# # 6. Menyimpan kedua model
# joblib.dump(knn_model, '/content/drive/MyDrive/PSD/Tugas3/knn_model.pkl')
# joblib.dump(nn_model, '/content/drive/MyDrive/PSD/Tugas3/nn_model.pkl')

# # 7. Melakukan prediksi dengan kedua model dengan data dummy

# # Mengatasi NaN di dummy_data_scaled dengan SimpleImputer
# imputer_dummy = SimpleImputer(strategy='mean')
# dummy_data_scaled_imputed = imputer_dummy.fit_transform(dummy_data_scaled)


# # Prediksi dengan model KNN
# dummy_pred_knn = knn_model.predict(dummy_data_scaled_imputed)

# # Prediksi dengan model Neural Network
# dummy_pred_nn = nn_model.predict(dummy_data_scaled_imputed)

# print("Prediksi Model KNN untuk Data Dummy:", dummy_pred_knn)
# print("Prediksi Model Neural Network untuk Data Dummy:", dummy_pred_nn)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
import joblib
from sklearn.datasets import make_blobs
import numpy as np
from pickle import dump

# 1. Split fitur (X) dan labels (y) menjadi 80-20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# X_synthetic, y_synthetic = make_blobs(n_samples=100, n_features=2, centers=3, random_state=42)



print("Ukuran Data Latih (X_train, y_train):", X_train.shape, y_train.shape)
print("Ukuran Data Uji (X_test, y_test):", X_test.shape, y_test.shape)

# 2. Normalisasi dengan metode Min-Max
minmax_scaler = MinMaxScaler()
X_train_minmax_scaled = minmax_scaler.fit_transform(X_train)
X_test_minmax_scaled = minmax_scaler.transform(X_test)
# dump(minmax_scaler, open('/content/drive/MyDrive/PSD/Tugas3/scaler.pkl', 'wb'))

# 3. Penanganan NaN
imputer = SimpleImputer(strategy='mean')
X_train_minmax_scaled = imputer.fit_transform(X_train_minmax_scaled)
X_test_minmax_scaled = imputer.transform(X_test_minmax_scaled)

# 4. Membuat dan melatih model K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier(n_neighbors=35)
# 0.7178571428571429
knn_model.fit(X_train_minmax_scaled, y_train)

# 5. Membuat dan melatih model Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=1)
nn_model.fit(X_train_minmax_scaled, y_train)

y_pred_knn = knn_model.predict(X_test_minmax_scaled)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print("Akurasi Model KNN:", accuracy_knn)

y_pred_nn = nn_model.predict(X_test_minmax_scaled)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print("Akurasi Model Neural Network:", accuracy_nn)


# 6. Menyimpan kedua model
joblib.dump(knn_model, '/content/drive/MyDrive/PSD/Tugas3/knn_model.pkl')
joblib.dump(nn_model, '/content/drive/MyDrive/PSD/Tugas3/nn_model.pkl')

# 7. Melakukan prediksi dengan kedua model dengan data dummy

# Mengatasi NaN di dummy_data_scaled dengan SimpleImputer
imputer_dummy = SimpleImputer(strategy='mean')
dummy_data_scaled_imputed = imputer_dummy.fit_transform(dummy_data_scaled)


# Prediksi dengan model KNN
dummy_pred_knn = knn_model.predict(dummy_data_scaled_imputed)

# Prediksi dengan model Neural Network
dummy_pred_nn = nn_model.predict(dummy_data_scaled_imputed)

print("Prediksi Model KNN untuk Data Dummy:", dummy_pred_knn)
print("Prediksi Model Neural Network untuk Data Dummy:", dummy_pred_nn)

Ukuran Data Latih (X_train, y_train): (2240, 19) (2240,)
Ukuran Data Uji (X_test, y_test): (560, 19) (560,)
Akurasi Model KNN: 0.7178571428571429
Akurasi Model Neural Network: 0.8303571428571429
Prediksi Model KNN untuk Data Dummy: ['angry' 'angry']
Prediksi Model Neural Network untuk Data Dummy: ['angry' 'fear']


In [None]:
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

# Membuat objek imputer pengisian nilai NaN menggunakan rata-rata
imputer = SimpleImputer(strategy='mean')

# Mengisi nilai NaN dalam data dengan rata-rata dari masing-masing kolom
X_imputed = imputer.fit_transform(X)

# PCA
pca = PCA(n_components=19)
pca.fit(X_imputed)
X_pca = pca.transform(X_imputed)
X_pca


array([[ 7.54854197e-01,  3.42665987e+00,  3.14632758e+00, ...,
        -3.16152812e-16, -3.13503145e-16,  0.00000000e+00],
       [-2.33082773e+00,  1.26565809e+00,  1.27291988e+00, ...,
        -3.99085396e-16,  4.24452651e-17,  0.00000000e+00],
       [-1.25591768e+00,  1.55573890e+00,  3.24468685e+00, ...,
        -2.66052235e-16, -2.01864378e-16,  0.00000000e+00],
       ...,
       [ 2.19520270e-01,  2.08096204e+00, -1.97540059e+00, ...,
         4.33046439e-17,  3.14286511e-16,  0.00000000e+00],
       [ 7.90464747e-01,  1.34165730e+00, -1.28028382e+00, ...,
         9.19646227e-17,  9.01893705e-17,  0.00000000e+00],
       [ 2.12789728e+00,  1.53074623e+00, -2.66391463e+00, ...,
         5.51599234e-17,  2.47532256e-16,  0.00000000e+00]])

In [None]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=18)
# pca.fit(X)
# pca = pca.transform(X)
# pca