# **ZERO CROSSING RATE**

In [None]:
# import library
from google.colab import drive
import os
import librosa
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, mode

In [None]:
# mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/psdd/alldataaudio/

/content/drive/MyDrive/psdd/alldataaudio


In [None]:
folders=['YAF_sad','YAF_pleasant_surprised','YAF_neutral',
         'YAF_happy','YAF_fear','YAF_disgust','YAF_angry',
         'OAF_Sad','OAF_Pleasant_surprise','OAF_neutral',
         'OAF_happy','OAF_Fear','OAF_disgust',
         'OAF_angry',
         ]

In [None]:
def calculate_statistics(audio_path):
    y, sr = librosa.load(audio_path)

    # untuk menghitung nilai statistika
    mean = np.mean(y)
    std_dev = np.std(y)
    max_value = np.max(y)
    min_value = np.min(y)
    median = np.median(y)
    skewness = skew(y)  # calculate skewness
    kurt = kurtosis(y)  # calculate kurtosis
    q1 = np.percentile(y, 25)
    q3 = np.percentile(y, 75)
    mode_value, _ = mode(y)  # calculate mode
    iqr = q3 - q1

    # untuk menghitung nilai zcr
    zcr_mean = np.mean(librosa.feature.zero_crossing_rate(y=y))
    zcr_median = np.median(librosa.feature.zero_crossing_rate(y=y))
    zcr_std_dev = np.std(librosa.feature.zero_crossing_rate(y=y))
    zcr_kurtosis = kurtosis(librosa.feature.zero_crossing_rate(y=y)[0])
    zcr_skew = skew(librosa.feature.zero_crossing_rate(y=y)[0])

    # untuk menghitung nilai rmse
    rmse = np.sum(y**2) / len(y)
    rmse_median = np.median(y**2)
    rmse_std_dev = np.std(y**2)
    rmse_kurtosis = kurtosis(y**2)
    rmse_skew = skew(y**2)

    return [zcr_mean, zcr_median, zcr_std_dev, zcr_kurtosis, zcr_skew, rmse, rmse_median, rmse_std_dev, rmse_kurtosis, rmse_skew]

In [None]:
features =[]

In [None]:
for folder in folders:
    folder_path = f'{folder}'
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav'):
            audio_path = os.path.join(folder_path, filename)
            statistics = calculate_statistics(audio_path)
            features.append([folder] + statistics)

In [None]:
# membuat dataframe dari data
columns =  ['Label'] + ['ZCR Mean', 'ZCR Median', 'ZCR Std Dev', 'ZCR Kurtosis', 'ZCR Skew', 'RMSE', 'RMSE Median', 'RMSE Std Dev', 'RMSE Kurtosis', 'RMSE Skew']
df = pd.DataFrame(features, columns=columns)

In [None]:
# menampilkan file csv
df

Unnamed: 0,Label,ZCR Mean,ZCR Median,ZCR Std Dev,ZCR Kurtosis,ZCR Skew,RMSE,RMSE Median,RMSE Std Dev,RMSE Kurtosis,RMSE Skew
0,YAF_sad,0.201049,0.054199,0.246854,-0.394940,1.095446,0.001920,0.000288,0.004052,18.925496,3.885113
1,YAF_sad,0.182617,0.046875,0.245189,0.037876,1.320627,0.002526,0.000275,0.005935,28.021587,4.589746
2,YAF_sad,0.139663,0.043457,0.195694,3.512925,2.150804,0.001961,0.000211,0.004304,15.477900,3.595385
3,YAF_sad,0.130381,0.031982,0.223911,3.041670,2.149263,0.001512,0.000326,0.003228,25.953508,4.361503
4,YAF_sad,0.134169,0.035645,0.214915,3.056344,2.109584,0.002098,0.000401,0.004363,20.997073,4.031322
...,...,...,...,...,...,...,...,...,...,...,...
2805,OAF_angry,0.086841,0.049805,0.112346,5.806209,2.636295,0.002737,0.000157,0.007432,36.356886,5.146689
2806,OAF_angry,0.139003,0.085449,0.121679,1.753519,1.647977,0.002644,0.000280,0.006520,23.247692,4.383268
2807,OAF_angry,0.150664,0.085938,0.151976,1.753957,1.715083,0.000950,0.000081,0.002327,34.368208,4.887910
2808,OAF_angry,0.079896,0.051270,0.094507,6.424877,2.737054,0.002780,0.000238,0.005903,17.489979,3.611583


In [None]:
df.to_csv('dataaudiobaru.csv',index=False)

# **NORMALISASI SETELAH SPLIT DATA DAN MENYIMPAN NORMALISASI DALAM BENTUK MODEL**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from pickle import dump

In [None]:
# baca data dari file csv
dataknn= pd.read_csv('dataaudiobaru.csv')

# pisahkan fitur (x) dan label (y)
X = dataknn.drop(['Label'], axis=1)  # ganti 'target_column' dengan nama kolom target
y = dataknn['Label']

# split data into train and test sets
X_train,X_test,y_train, y_test= train_test_split(X, y, random_state=1, test_size=0.2)

# define scaler
scaler = StandardScaler()

# fit scaler on the training dataset
scaler.fit(X_train)

# save the scaler
dump(scaler, open('scalerbaru.pkl', 'wb'))

# transform the training dataset
X_train_scaled = scaler.transform(X_train)

dataknn

Unnamed: 0,Label,ZCR Mean,ZCR Median,ZCR Std Dev,ZCR Kurtosis,ZCR Skew,RMSE,RMSE Median,RMSE Std Dev,RMSE Kurtosis,RMSE Skew
0,YAF_sad,0.201049,0.054199,0.246854,-0.394940,1.095446,0.001920,0.000288,0.004052,18.925496,3.885113
1,YAF_sad,0.182617,0.046875,0.245189,0.037876,1.320627,0.002526,0.000275,0.005935,28.021587,4.589746
2,YAF_sad,0.139663,0.043457,0.195694,3.512925,2.150804,0.001961,0.000211,0.004304,15.477900,3.595385
3,YAF_sad,0.130381,0.031982,0.223911,3.041670,2.149263,0.001512,0.000326,0.003228,25.953508,4.361503
4,YAF_sad,0.134169,0.035645,0.214915,3.056344,2.109584,0.002098,0.000401,0.004363,20.997073,4.031322
...,...,...,...,...,...,...,...,...,...,...,...
2805,OAF_angry,0.086841,0.049805,0.112346,5.806209,2.636295,0.002737,0.000157,0.007432,36.356886,5.146689
2806,OAF_angry,0.139003,0.085449,0.121679,1.753519,1.647977,0.002644,0.000280,0.006520,23.247692,4.383268
2807,OAF_angry,0.150664,0.085938,0.151976,1.753957,1.715083,0.000950,0.000081,0.002327,34.368208,4.887910
2808,OAF_angry,0.079896,0.051270,0.094507,6.424877,2.737054,0.002780,0.000238,0.005903,17.489979,3.611583


In [None]:
import pickle
with open('scalerbaru.pkl', 'rb') as standarisasi:
    loadscal= pickle.load(standarisasi)

In [None]:
X_test_scaled=loadscal.transform(X_test) # normalisasi x testing dari hasil normalisasi x train yang disimpan dalam model

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [None]:
k = 100
acc = np.zeros((k-1))

for n in range(1,k,2):
    knn = KNeighborsClassifier(n_neighbors= n, metric = "euclidean").fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)

    acc[n-1]= accuracy_score(y_test,y_pred)

print('akurasi terbaik adalah', acc.max(), 'dengan nilai k =', acc.argmax()+1)

akurasi terbaik adalah 0.7170818505338078 dengan nilai k = 13


In [None]:
knn = KNeighborsClassifier(n_neighbors= 13, metric = "euclidean")
dump(knn, open('modelknn.pkl', 'wb'))

In [None]:
import pickle
with open('modelknn.pkl', 'rb') as knn:
    loadknn= pickle.load(knn)
loadknn.fit(X_train_scaled, y_train)

In [None]:
y_pred = loadknn.predict(X_test_scaled)
y_pred

array(['YAF_sad', 'YAF_pleasant_surprised', 'YAF_fear', 'YAF_angry',
       'YAF_angry', 'OAF_disgust', 'OAF_Pleasant_surprise', 'YAF_sad',
       'OAF_disgust', 'OAF_neutral', 'YAF_angry', 'YAF_happy',
       'YAF_disgust', 'OAF_happy', 'YAF_angry', 'YAF_pleasant_surprised',
       'OAF_Fear', 'YAF_neutral', 'YAF_angry', 'YAF_disgust',
       'YAF_disgust', 'OAF_neutral', 'YAF_neutral',
       'OAF_Pleasant_surprise', 'YAF_sad', 'OAF_disgust', 'OAF_happy',
       'OAF_happy', 'YAF_pleasant_surprised', 'YAF_angry', 'YAF_happy',
       'OAF_happy', 'OAF_Sad', 'OAF_Fear', 'YAF_disgust', 'OAF_happy',
       'OAF_Pleasant_surprise', 'OAF_neutral', 'YAF_disgust',
       'YAF_neutral', 'OAF_disgust', 'OAF_Fear', 'YAF_neutral', 'OAF_Sad',
       'OAF_Fear', 'YAF_neutral', 'YAF_sad', 'OAF_Fear', 'YAF_disgust',
       'OAF_Sad', 'OAF_Sad', 'OAF_disgust', 'YAF_disgust',
       'YAF_pleasant_surprised', 'YAF_pleasant_surprised', 'OAF_disgust',
       'OAF_disgust', 'YAF_neutral', 'OAF_neutral', '

In [None]:
accuracy = accuracy_score(y_test,y_pred)
print("akurasi :",accuracy)

akurasi : 0.7170818505338078


# **PREPROCESSING**

pre processing terdiri dari beberapa langkah :

1. normalisasi data (penyamaan nilai data kedalam rentang 0 - 1) yang bertujuan agar ciri data yang valuenya dalam rentang jutaan atau ribuan tidak merusak komputasi.

2. data cleaning, data cleaning dilakukan jika ada missing value pada ciri atau fitur data, teknik mengisi value dengan cara memasukkan nilai rata2 data kolom atau fitur tersebut.

# **REDUKSI DATA**

reduksi ada 2 jenis bisa menggunakan seleksi data atau transformation data (contoh nya PCA / Principal component anlysis). seleksi data kita dapat emmilih fitur berdasarkan yang fitur / kolom yang paling berpengaruh. sedangkan transformasi data kita perlu membuat kooordinat baru dari dari fitur yang ada sejumlah dengan jumlah fitur nya. mengapa reduksi data atau data reduction di perlukan? karna terlalu banyak kolom / fitur / ciri yang harus dikenali tidak baik untuk pemprosesan data dan memakan waktu komputasi yang lama, maka dari itu reduksi data di lakukan untuk mendapatkan data yang terbaik. ketika ingin mencari koordinat baru :

1. buat matriks covarian

2. buat persamaan

note : konstanta yang paling besar mengartikan di koordinat tersebut merupakan ciri paling banyak atau penting

In [None]:
from sklearn.decomposition import PCA as sklearnPCA

In [None]:
sklearn_pca = sklearnPCA(n_components=8)
X_train_pca = sklearn_pca.fit_transform(X_train_scaled)
type(X_train_pca)

numpy.ndarray

In [None]:
dump(sklearn_pca, open('PCA8.pkl', 'wb'))

In [None]:
import pickle
with open('PCA8.pkl', 'rb') as pca:
    loadpca = pickle.load(pca)

In [None]:
X_test_pca=loadpca.transform(X_test_scaled)
X_test_pca.shape

(562, 8)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
classifier = KNeighborsClassifier(n_neighbors=15)
classifier.fit(X_train_pca, y_train)

In [None]:
y_prediksi = classifier.predict(X_test_pca)
y_prediksi

array(['YAF_sad', 'YAF_pleasant_surprised', 'YAF_fear', 'YAF_angry',
       'YAF_fear', 'OAF_disgust', 'OAF_Pleasant_surprise', 'YAF_sad',
       'OAF_disgust', 'OAF_neutral', 'YAF_happy', 'YAF_happy',
       'YAF_disgust', 'OAF_happy', 'YAF_angry', 'YAF_pleasant_surprised',
       'OAF_Fear', 'YAF_neutral', 'YAF_angry', 'YAF_disgust',
       'YAF_neutral', 'OAF_neutral', 'YAF_neutral',
       'OAF_Pleasant_surprise', 'YAF_sad', 'OAF_happy', 'OAF_happy',
       'OAF_happy', 'YAF_pleasant_surprised', 'YAF_angry', 'YAF_happy',
       'OAF_happy', 'OAF_Sad', 'OAF_Fear', 'YAF_disgust', 'OAF_happy',
       'OAF_Pleasant_surprise', 'OAF_neutral', 'YAF_disgust',
       'YAF_neutral', 'OAF_disgust', 'OAF_Fear', 'YAF_neutral', 'OAF_Sad',
       'OAF_Fear', 'YAF_neutral', 'YAF_sad', 'OAF_Fear', 'YAF_disgust',
       'OAF_Sad', 'OAF_Sad', 'OAF_disgust', 'YAF_disgust',
       'YAF_pleasant_surprised', 'YAF_pleasant_surprised', 'OAF_disgust',
       'OAF_disgust', 'YAF_neutral', 'OAF_neutral', 'OAF

In [None]:
acc_pca = accuracy_score(y_test,y_prediksi)
print("akurasi :",acc_pca)
# akurasi setelah di normalisasi (akurasi awal): 0.7170818505338078

akurasi : 0.6868327402135231


# **HASIL DARI AKURASI MENJADI SATU BARIS**

In [None]:
import librosa, os, numpy as np, pandas as pd, pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
folders=['YAF_sad','YAF_pleasant_surprised','YAF_neutral',
         'YAF_happy','YAF_fear','YAF_disgust','YAF_angry',
         'OAF_Sad','OAF_Pleasant_surprise','OAF_neutral',
         'OAF_happy','OAF_Fear','OAF_disgust',
         'OAF_angry',
         ]

In [None]:
def calculate_statistics(audio_path):
    y, sr = librosa.load(audio_path)
    mean, std_dev, max_value, min_value, median, skewness, kurt, q1, q3, mode_value = (
        np.mean(y), np.std(y), np.max(y), np.min(y), np.median(y),
        skew(y), kurtosis(y), np.percentile(y, 25), np.percentile(y, 75),
        mode(y)[0]
    )
    zcr_mean, zcr_median, zcr_std_dev, zcr_kurtosis, zcr_skew = (
        np.mean(librosa.feature.zero_crossing_rate(y=y)),
        np.median(librosa.feature.zero_crossing_rate(y=y)),
        np.std(librosa.feature.zero_crossing_rate(y=y)),
        kurtosis(librosa.feature.zero_crossing_rate(y=y)[0]),
        skew(librosa.feature.zero_crossing_rate(y=y)[0])
    )
    rmse, rmse_median, rmse_std_dev, rmse_kurtosis, rmse_skew = (
        np.sum(y**2) / len(y), np.median(y**2), np.std(y**2),
        kurtosis(y**2), skew(y**2)
    )
    return [zcr_mean, zcr_median, zcr_std_dev, zcr_kurtosis, zcr_skew,
            rmse, rmse_median, rmse_std_dev, rmse_kurtosis, rmse_skew]

In [None]:
features = []

In [None]:
for folder in folders:
    folder_path = f'{folder}'
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav'):
            audio_path = os.path.join(folder_path, filename)
            statistics = calculate_statistics(audio_path)
            features.append([folder] + statistics)

In [None]:
columns = ['Label'] + ['ZCR Mean', 'ZCR Median', 'ZCR Std Dev', 'ZCR Kurtosis', 'ZCR Skew',
                       'RMSE', 'RMSE Median', 'RMSE Std Dev', 'RMSE Kurtosis', 'RMSE Skew']

In [None]:
f = pd.DataFrame(features, columns=columns)

In [None]:
best_accuracies = {}

In [None]:
column_names = df.columns[1:]  # ambil nama kolom kecuali kolom 'Label'

In [None]:
for column in column_names:
    dataknn = df[['Label', column]]
    X = dataknn.drop(['Label'], axis=1)
    y = dataknn['Label']
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.2)

    scaler = StandardScaler()
    scaler.fit(X_train)
    dump(scaler, open(f'scaler_{column}.pkl', 'wb'))

    with open(f'scaler_{column}.pkl', 'rb') as standarisasi:
        loadscal = pickle.load(standarisasi)

    X_test_scaled = loadscal.transform(X_test)

    k = 30
    acc = np.zeros((k-1))

    for n in range(1, k, 2):
        knn = KNeighborsClassifier(n_neighbors=n, metric="euclidean").fit(X_train, y_train)
        y_pred = knn.predict(X_test_scaled)
        acc[n-1] = accuracy_score(y_test, y_pred)

    best_accuracies[column] = {'best_accuracy': acc.max(), 'best_k': acc.argmax()+1}



In [None]:
# membuat dataframe dari hasil akurasi terbaik
accuracy_df = pd.DataFrame(best_accuracies).T.reset_index()
accuracy_df.columns = ['Kolom', 'Akurasi Terbaik', 'Nilai K Terbaik']

In [None]:
# simpan dataframe ke dalam file csv
accuracy_df.to_csv('akurasi_terbaik.csv', index=False)