In [None]:
!pip install librosa



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import scipy.stats

In [None]:
# Definisikan path folder utama
folder_path = '/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/AudioTest/TESS Toronto emotional speech set data'

#Subfolder yang digunakan
subfolders = ['OAF_angry',"OAF_disgust",'OAF_Fear','OAF_happy','OAF_neutral','OAF_Pleasant_surprise','OAF_Sad','YAF_angry','YAF_disgust','YAF_fear','YAF_happy','YAF_neutral','YAF_pleasant_surprised','YAF_sad']

In [None]:
# List untuk menyimpan hasil statistik dari setiap file audio
data = {
    'File Name': [],
    'Mean': [],
    'Std' : [],
    'Median': [],
    'Min': [],
    'Max': [],
    'Skewness': [],
    'Kurtosis': [],
    'Mode': [],
    'Q1': [],
    'Q3': [],
    'IQR': [],
    'ZCR Mean': [],
    'ZCR Median': [],
    'ZCR Std': [],
    'ZCR Kurtosis': [],
    'ZCR Skewness': [],
    'RMSE Mean': [],
    'RMSE Median': [],
    'RMSE Std': [],
    'RMSE Kurtosis': [],
    'RMSE Skewness': [],
    'Label': []
}

In [None]:
# Loop melalui setiap subfolder yang ingin digunakan
for subfolder in subfolders:
    subfolder_path = os.path.join(folder_path, subfolder)

    # List semua file audio .wav dalam subfolder
    audio_files = [f for f in os.listdir(subfolder_path) if f.endswith('.wav')]

    # Loop melalui setiap file audio .wav
    for audio_file in audio_files:
        audio_file_path = os.path.join(subfolder_path, audio_file)

        x, sr = librosa.load(audio_file_path, sr=None)
        zcr = librosa.feature.zero_crossing_rate(x)

        # Menghitung statistik dari file audio
        mean = np.mean(x)
        std_dev = np.std(x)
        median = np.median(x)
        min_value = np.min(x)
        max_value = np.max(x)
        skewness = scipy.stats.skew(x)
        kurtosis = scipy.stats.kurtosis(x)
        mode = scipy.stats.mode(x)[0]
        q1 = np.quantile(x, 0.25)
        q3 = np.quantile(x, 0.75)
        iqr = scipy.stats.iqr(x)

        zcr_mean = zcr.mean()
        zcr_median = np.median(zcr)
        zcr_std = np.std(zcr)
        zcr_kurtosis = scipy.stats.kurtosis(zcr.ravel(), nan_policy='omit')
        zcr_skewness = scipy.stats.skew(zcr.ravel(), nan_policy='omit')


        # x_normalized = (x - np.mean(x)) / np.std(x)
        # rmse = np.sqrt(np.mean(np.square(x_normalized)))
        rmse = np.sqrt(np.mean(np.square(x)))
        rmse


        rms_mean = rmse.mean()
        rms_median = np.median(rmse)
        rms_std = np.std(rmse)
        rms_kurtosis = scipy.stats.kurtosis(rmse, nan_policy='omit')
        rms_skewness = scipy.stats.skew(rmse, nan_policy='omit')



        # Menentukan label berdasarkan subfolder
        label = subfolder.split('_')[-1]

        # Menambahkan hasil statistik dan label ke dalam list
        data['File Name'].append(audio_file)
        data['Mean'].append(mean)
        data['Std'].append(std_dev)
        data['Median'].append(median)
        data['Min'].append(min_value)
        data['Max'].append(max_value)
        data['Skewness'].append(skewness)
        data['Kurtosis'].append(kurtosis)
        data['Mode'].append(mode)
        data['Q1'].append(q1)
        data['Q3'].append(q3)
        data['IQR'].append(iqr)
        data['ZCR Mean'].append(zcr_mean)
        data['ZCR Median'].append(zcr_median)
        data['ZCR Std'].append(zcr_std)
        data['ZCR Kurtosis'].append(zcr_kurtosis)
        data['ZCR Skewness'].append(zcr_skewness)
        data['RMSE Mean'].append(rms_mean)
        data['RMSE Median'].append(rms_median)
        data['RMSE Std'].append(rms_std)
        data['RMSE Kurtosis'].append(rms_kurtosis)
        data['RMSE Skewness'].append(rms_skewness)
        data['Label'].append(label)


In [None]:
# Membuat DataFrame dari data
df = pd.DataFrame(data)
df


Unnamed: 0,File Name,Mean,Std,Median,Min,Max,Skewness,Kurtosis,Mode,Q1,...,ZCR Median,ZCR Std,ZCR Kurtosis,ZCR Skewness,RMSE Mean,RMSE Median,RMSE Std,RMSE Kurtosis,RMSE Skewness,Label
0,OAF_fall_angry.wav,0.000021,0.044256,0.000305,-0.239166,0.208862,-0.134276,3.009182,0.000153,-0.012634,...,0.065918,0.126751,0.205293,1.319255,0.044256,0.044256,0.0,,,angry
1,OAF_bath_angry.wav,0.000126,0.058056,0.000671,-0.343719,0.314209,-0.200551,4.487713,0.000183,-0.013489,...,0.050781,0.123519,2.449860,1.920870,0.058056,0.058056,0.0,,,angry
2,OAF_haze_angry.wav,0.000048,0.031645,-0.000183,-0.215881,0.217316,-0.024214,5.425911,-0.001709,-0.007904,...,0.053711,0.074469,2.841894,1.875801,0.031645,0.031645,0.0,,,angry
3,OAF_limb_angry.wav,0.000019,0.042115,0.001068,-0.187256,0.196869,-0.204009,2.160890,0.003082,-0.014557,...,0.046875,0.080692,6.144893,2.627124,0.042115,0.042115,0.0,,,angry
4,OAF_gap_angry.wav,0.000026,0.040288,0.000397,-0.243225,0.209747,-0.052664,4.467267,0.000397,-0.010437,...,0.061035,0.078923,5.513442,2.336529,0.040288,0.040288,0.0,,,angry
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,YAF_take_sad.wav,-0.000040,0.053248,0.000610,-0.255707,0.152832,-1.154208,3.208696,0.000000,-0.009949,...,0.046875,0.203288,2.999281,2.067435,0.053248,0.053248,0.0,,,sad
2796,YAF_road_sad.wav,-0.000035,0.035832,0.002563,-0.201599,0.123779,-1.205743,2.810658,0.000549,-0.010162,...,0.024414,0.170376,6.357231,2.783468,0.035833,0.035833,0.0,,,sad
2797,YAF_ring_sad.wav,-0.000035,0.038969,0.001923,-0.189392,0.147430,-0.988923,2.178896,0.000244,-0.013428,...,0.021973,0.185620,5.642601,2.671948,0.038969,0.038969,0.0,,,sad
2798,YAF_pad_sad.wav,-0.000042,0.044375,0.002228,-0.194519,0.139740,-1.108679,2.393505,-0.000122,-0.012939,...,0.032959,0.197696,4.814393,2.496656,0.044375,0.044375,0.0,,,sad


In [None]:
#Ekspor
csv_file_path = '/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/hasil.csv'
df.to_csv(csv_file_path, index=False, na_rep='NaN')

print("Data telah diekspor ke:", csv_file_path)

Data telah diekspor ke: /content/drive/MyDrive/PSD/Ektraksi_ciri_audio/hasil.csv


In [None]:
#Menghapus file name, (RMSE Kurt,RMSE Skew karena data NaN)  dan label
df_feature = df[df.columns[1:-3]]
df_feature

Unnamed: 0,Mean,Std,Median,Min,Max,Skewness,Kurtosis,Mode,Q1,Q3,IQR,ZCR Mean,ZCR Median,ZCR Std,ZCR Kurtosis,ZCR Skewness,RMSE Mean,RMSE Median,RMSE Std
0,-0.000211,0.027313,0.000275,-0.160767,0.136749,-0.364719,4.196715,-0.001099,-0.007782,0.008606,0.016388,0.084954,0.056396,0.086531,6.683087,2.526823,0.027314,0.027314,0.0
1,-0.000007,0.027599,0.000580,-0.131287,0.128510,-0.069990,2.732281,0.000824,-0.008606,0.010559,0.019165,0.067213,0.051270,0.064594,8.441311,2.862851,0.027599,0.027599,0.0
2,0.000159,0.037066,0.000366,-0.240417,0.264740,0.044376,8.602463,0.001465,-0.007019,0.008728,0.015747,0.068036,0.046387,0.087572,8.162113,2.835041,0.037067,0.037067,0.0
3,-0.000074,0.037442,-0.000183,-0.226227,0.178284,-0.074979,2.293641,0.001740,-0.014618,0.015991,0.030609,0.080542,0.053711,0.094905,6.870438,2.754929,0.037442,0.037442,0.0
4,0.000034,0.046819,0.000031,-0.272797,0.253754,0.046887,3.314473,-0.002533,-0.017487,0.017700,0.035187,0.090071,0.057617,0.080056,2.525665,1.877468,0.046820,0.046820,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,-0.000036,0.039804,0.004272,-0.185669,0.131104,-0.957077,1.928149,0.001160,-0.015411,0.022522,0.037933,0.107194,0.034668,0.198115,4.215802,2.412601,0.039804,0.039804,0.0
2796,-0.000036,0.034367,0.000732,-0.189697,0.146790,-0.763839,2.889698,-0.000244,-0.010620,0.016113,0.026733,0.159956,0.046387,0.205332,1.548212,1.600956,0.034367,0.034367,0.0
2797,-0.000047,0.036416,0.002533,-0.186035,0.112183,-1.186393,2.845274,-0.000397,-0.011627,0.020844,0.032471,0.106627,0.025879,0.200279,4.456847,2.456896,0.036416,0.036416,0.0
2798,-0.000029,0.039592,0.002411,-0.216187,0.119415,-1.005316,2.117460,0.000458,-0.011536,0.021454,0.032990,0.104151,0.021973,0.189365,4.300406,2.378635,0.039592,0.039592,0.0


In [None]:
df_label = df[df.columns[-1]]
df_label

0       angry
1       angry
2       angry
3       angry
4       angry
        ...  
2795      sad
2796      sad
2797      sad
2798      sad
2799      sad
Name: Label, Length: 2800, dtype: object

Split data

In [None]:
import joblib
from sklearn.model_selection import train_test_split
X_train, X_test , y_train, y_test = train_test_split(df_feature,df_label,test_size=0.2,random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
zscaler = scaler.fit(X_train)
joblib.dump(zscaler,'/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/zscore_scaler.pkl')


['/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/zscore_scaler.pkl']

In [None]:
X_train_normalize_zero = pd.DataFrame(zscaler.transform(X_train),columns = X_train.columns)
X_test_normalize_zero = pd.DataFrame(zscaler.transform(X_test),columns=X_test.columns)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
knn_zero = knn.fit(X_train_normalize_zero, y_train)
joblib.dump(knn_zero,'/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/zscore_knn.pkl')

['/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/zscore_knn.pkl']

In [None]:
from sklearn.metrics import accuracy_score

y_pred_knn_zero = knn_zero.predict(X_test_normalize_zero)

#akurasi
akurasi = accuracy_score(y_test,y_pred_knn_zero)
akurasi

0.7035714285714286

Akurasi MinMax

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler_mmax = scaler.fit(X_train)
joblib.dump(scaler_mmax,'/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/mmax_scaler.pkl')

X_train_normalize_mmax = pd.DataFrame(scaler_mmax.transform(X_train),columns=X_train.columns)
X_test_normalize_mmax = pd.DataFrame(scaler_mmax.transform(X_test),columns=X_test.columns)


In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
mmax_knn = knn.fit(X_train_normalize_mmax,y_train)

In [None]:
y_pred_mmax_knn = mmax_knn.predict(X_test_normalize_mmax)

#akurasi
akurasi = accuracy_score(y_test,y_pred_mmax_knn)
akurasi

0.7142857142857143

Z Score

In [None]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('Pca', PCA()),
    ('KNN', KNeighborsClassifier())
])

parameter_grid = {
    'Pca__n_components': [i for i in range(19,0,-1)],  # komponen pca
    'KNN__n_neighbors': [i for i in range(1, 20)]  # K
}

grid_search = GridSearchCV(estimator=pipeline, param_grid=parameter_grid, cv=5)  # mencari parameter dan menguji kombinasinya agar dapat hasil terbaik
# Dicoba ke data pelatihan
grid_search.fit(X_train_normalize_zero, y_train)
print("Best Parameters: ", grid_search.best_params_)


#Model terbaik
modelterbaik = grid_search.best_estimator_
y_pred = modelterbaik.predict(X_test_normalize_zero)
joblib.dump(modelterbaik,"/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_Knn_zero_grid.pkl")

#akurasi
akurasi = accuracy_score(y_test,y_pred)
print("Best akurasi pada data uji :",akurasi)


Best Parameters:  {'KNN__n_neighbors': 9, 'Pca__n_components': 13}
Best akurasi pada data uji : 0.7357142857142858


In [None]:
pca = PCA(n_components=19)
zeropca = pca.fit_transform(X_train_normalize_zero)
joblib.dump(pca,"/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_zero.pkl")

['/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_zero.pkl']

MINMAX

In [None]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('Pca', PCA()),
    ('KNN', KNeighborsClassifier())
])

parameter_grid = {
    'Pca__n_components': [i for i in range(19,0,-1)],  # komponen pca
    'KNN__n_neighbors': [i for i in range(1, 20)]  # K
}

grid_search = GridSearchCV(estimator=pipeline, param_grid=parameter_grid, cv=5)  # mencari parameter dan menguji kombinasinya agar dapat hasil terbaik
# Dicoba ke data pelatihan
grid_search.fit(X_train_normalize_mmax, y_train)
print("Best Parameters: ", grid_search.best_params_)


#Model terbaik
modelterbaik = grid_search.best_estimator_
y_pred = modelterbaik.predict(X_test_normalize_mmax)
joblib.dump(modelterbaik,"/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_Knn_mmax_grid.pkl")

#akurasi
akurasi = accuracy_score(y_test,y_pred)
print("Best akurasi pada data uji :",akurasi)


Best Parameters:  {'KNN__n_neighbors': 12, 'Pca__n_components': 14}
Best akurasi pada data uji : 0.7125


In [None]:
pca = PCA(n_components=19)
zeropca = pca.fit_transform(X_train_normalize_mmax)
joblib.dump(pca,"/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_mmax.pkl")

['/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/PCA_mmax.pkl']

In [None]:

for i in range(19, 14, -1):
  pca = PCA(n_components=i)
  pca.fit(X_train_normalize_zero)
  pca_train = pca.transform(X_train_normalize_zero)
  pca_test = pca.transform(X_test_normalize_zero)
  print(f'dimensi = {i}')
  for x in range(1,10, 2):
    Knn = KNeighborsClassifier(n_neighbors=x)
    Knn.fit(pca_train, y_train)
    y_pred = Knn.predict(pca_test)
    print(f'k = {x} = {accuracy_score(y_test, y_pred)}')


dimensi = 19
k = 1 = 0.6625
k = 3 = 0.7035714285714286
k = 5 = 0.7142857142857143
k = 7 = 0.7232142857142857
k = 9 = 0.7339285714285714
dimensi = 18
k = 1 = 0.6625
k = 3 = 0.7035714285714286
k = 5 = 0.7142857142857143
k = 7 = 0.7232142857142857
k = 9 = 0.7339285714285714
dimensi = 17
k = 1 = 0.6625
k = 3 = 0.7035714285714286
k = 5 = 0.7142857142857143
k = 7 = 0.7232142857142857
k = 9 = 0.7339285714285714
dimensi = 16
k = 1 = 0.6625
k = 3 = 0.7035714285714286
k = 5 = 0.7142857142857143
k = 7 = 0.7232142857142857
k = 9 = 0.7339285714285714
dimensi = 15
k = 1 = 0.6625
k = 3 = 0.7035714285714286
k = 5 = 0.7142857142857143
k = 7 = 0.7232142857142857
k = 9 = 0.7339285714285714


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import numpy as np


# Menggunakan StandardScaler untuk mentransformasi fitur-fitur
scaler_audio = StandardScaler()
scaled_features = scaler_audio.fit_transform(features)

# Normalisasi data pelatihan
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

dummy_data = np.array([[[0.8, 0.2]*10 + [0.3]], [[0.4, 0.6]*10 + [0.3]], [[0.5,0.5]*10 +[0.3]]])



# Normalisasi data dummy dengan metode Min-Max
dummy_data_scaled = scaler_audio.transform(dummy_data.reshape(3,-1))

# Menggunakan StandardScaler yang sama untuk normalisasi dummy_data
dummy_data_scaled = scaler.transform(dummy_data.reshape(3,-1))

# Menggunakan MinMaxScaler untuk dataset make_blobs
scaler_blobs = MinMaxScaler()

# Fitscaler train dataset make_blobs
scaler_blobs.fit(X_train)

# Transformasi dataset make_blobs
X_train_blobs_scaled = scaler_blobs.transform(X_train)
X_test_blobs_scaled = scaler_blobs.transform(X_test)

# Mengecek rentang nilai setiap fitur setelah penskalaan
for i in range(X_test_blobs_scaled.shape[1]):
    print('>%d, train: min=%.3f, max=%.3f, test: min=%.3f, max=%.3f' %
          (i, X_train_blobs_scaled[:, i].min(), X_train_blobs_scaled[:, i].max(),
           X_test_blobs_scaled[:, i].min(), X_test_blobs_scaled[:, i].max()))
print("Dimensi X_train:", X_train.shape)

>0, train: min=0.000, max=1.000, test: min=0.190, max=0.921
>1, train: min=0.000, max=1.000, test: min=0.002, max=1.085
>2, train: min=0.000, max=1.000, test: min=-0.060, max=0.714
>3, train: min=0.000, max=1.000, test: min=-0.035, max=0.996
>4, train: min=0.000, max=1.000, test: min=0.004, max=1.003
>5, train: min=0.000, max=1.000, test: min=-0.984, max=1.212
>6, train: min=0.000, max=1.000, test: min=0.005, max=2.715
>7, train: min=0.000, max=1.000, test: min=0.228, max=0.901
>8, train: min=0.000, max=1.000, test: min=-0.124, max=1.003
>9, train: min=0.000, max=1.000, test: min=0.004, max=1.000
>10, train: min=0.000, max=1.000, test: min=-0.002, max=1.035
>11, train: min=0.000, max=1.000, test: min=0.094, max=0.979
>12, train: min=0.000, max=1.000, test: min=0.054, max=0.966
>13, train: min=0.000, max=1.000, test: min=0.066, max=0.982
>14, train: min=0.000, max=1.000, test: min=0.012, max=0.849
>15, train: min=0.000, max=1.000, test: min=0.036, max=0.924
>16, train: min=0.000, max=1.

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
import joblib
import numpy as np

# 1. Split fitur (X) dan labels (y) menjadi 80-20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print("Ukuran Data Latih (X_train, y_train):", X_train.shape, y_train.shape)
print("Ukuran Data Uji (X_test, y_test):", X_test.shape, y_test.shape)

# 2. Normalisasi dengan metode Min-Max
minmax_scaler = MinMaxScaler()
X_train_minmax_scaled = minmax_scaler.fit_transform(X_train)
X_test_minmax_scaled = minmax_scaler.transform(X_test)

# 3. Penanganan NaN
imputer = SimpleImputer(strategy='mean')
X_train_minmax_scaled = imputer.fit_transform(X_train_minmax_scaled)
X_test_minmax_scaled = imputer.transform(X_test_minmax_scaled)

# 4. Membuat dan melatih model K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier(n_neighbors=7)
knn_model.fit(X_train_minmax_scaled, y_train)

# 5. Membuat dan melatih model Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=1)
nn_model.fit(X_train_minmax_scaled, y_train)

y_pred_knn = knn_model.predict(X_test_minmax_scaled)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print("Akurasi Model KNN:", accuracy_knn)

y_pred_nn = nn_model.predict(X_test_minmax_scaled)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print("Akurasi Model Neural Network:", accuracy_nn)


# 6. Menyimpan kedua model
joblib.dump(knn_model, '/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/knn_model.pkl')
joblib.dump(nn_model, '/content/drive/MyDrive/PSD/Ektraksi_ciri_audio/nn_model.pkl')

# 7. Melakukan prediksi dengan kedua model dengan data dummy

# Mengatasi NaN di dummy_data_scaled dengan SimpleImputer
imputer_dummy = SimpleImputer(strategy='mean')
dummy_data_scaled_imputed = imputer_dummy.fit_transform(dummy_data_scaled)


# Prediksi dengan model KNN
dummy_pred_knn = knn_model.predict(dummy_data_scaled_imputed)

# Prediksi dengan model Neural Network
dummy_pred_nn = nn_model.predict(dummy_data_scaled_imputed)

print("Prediksi Model KNN untuk Data Dummy:", dummy_pred_knn)
print("Prediksi Model Neural Network untuk Data Dummy:", dummy_pred_nn)

Ukuran Data Latih (X_train, y_train): (2240, 21) (2240,)
Ukuran Data Uji (X_test, y_test): (560, 21) (560,)


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Akurasi Model KNN: 0.7553571428571428
Akurasi Model Neural Network: 0.8267857142857142
Prediksi Model KNN untuk Data Dummy: ['angry' 'fear' 'angry']
Prediksi Model Neural Network untuk Data Dummy: ['fear' 'fear' 'fear']


In [None]:
# from sklearn.impute import SimpleImputer
# from sklearn.decomposition import PCA

# # Membuat objek imputer pengisian nilai NaN menggunakan rata-rata
# imputer = SimpleImputer(strategy='mean')

# # Mengisi nilai NaN dalam data dengan rata-rata dari masing-masing kolom
# X_imputed = imputer.fit_transform(X)

# # PCA
# pca = PCA(n_components=19)
# pca.fit(X_imputed)
# X_pca = pca.transform(X_imputed)
# X_pca


array([[-2.10061415e+00,  1.27222911e+00,  2.34040076e+00, ...,
        -6.99645419e-16, -6.23215190e-17,  0.00000000e+00],
       [-2.75472076e+00,  2.62446209e+00,  1.92950687e+00, ...,
        -5.68593674e-16, -4.01895346e-17,  0.00000000e+00],
       [-2.14441101e+00,  2.22729746e+00,  2.14043399e+00, ...,
        -1.85166444e-16, -6.49114256e-17,  0.00000000e+00],
       ...,
       [-3.01279854e-01,  2.83367321e+00, -2.05821216e+00, ...,
        -4.81289806e-17,  1.58166347e-18,  0.00000000e+00],
       [-1.87853546e-01,  2.85645230e+00, -2.05952665e+00, ...,
        -2.19524320e-17,  1.22791109e-17,  0.00000000e+00],
       [ 7.90464747e-01,  1.34165730e+00, -1.28028382e+00, ...,
        -1.51656805e-16,  1.71106179e-17,  0.00000000e+00]])

In [None]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=19)
# pca.fit(X)
# pca = pca.transform(X)
# pca