In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
data = {
    'AQI': np.random.randint(50, 300, 1000),
    'PM10': np.random.uniform(10, 100, 1000),
    'PM2_5': np.random.uniform(5, 80, 1000),
    'NO2': np.random.uniform(5, 60, 1000),
    'SO2': np.random.uniform(2, 40, 1000),
    'O3': np.random.uniform(20, 120, 1000),
    'Temperature': np.random.uniform(15, 35, 1000),
    'Humidity': np.random.uniform(30, 90, 1000),
    'WindSpeed': np.random.uniform(0.5, 10, 1000),
    'HealthImpactScore': np.random.uniform(0, 100, 1000),
}

In [3]:
# Membuat DataFrame
df = pd.DataFrame(data)

In [4]:
# Memisahkan fitur (X) dan target (y)
X = df[['AQI', 'PM10', 'PM2_5', 'NO2', 'SO2', 'O3', 'Temperature', 'Humidity', 'WindSpeed']].values
y = df['HealthImpactScore'].values

In [5]:
# Normalisasi data menggunakan MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# Reshape data untuk CNN (menambahkan dimensi channel)
X_scaled = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

In [7]:
# Membagi dataset menjadi train dan test set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
# Membuat model CNN
model = Sequential([
    Input(shape=(X_train.shape[1], 1)),  # Input layer
    Conv1D(filters=32, kernel_size=3, activation='relu'),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Output layer
])

In [9]:
# Kompilasi model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [10]:
# Menambahkan Early Stopping untuk mencegah overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [30]:
# Melatih model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 827.5847 - mae: 24.8561 - val_loss: 894.2547 - val_mae: 25.8734
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 861.7982 - mae: 25.1913 - val_loss: 893.6512 - val_mae: 25.8683
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 858.1600 - mae: 25.2039 - val_loss: 895.0501 - val_mae: 25.8963
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 826.2268 - mae: 24.9056 - val_loss: 896.0702 - val_mae: 25.9027
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 837.5899 - mae: 25.1255 - val_loss: 908.6727 - val_mae: 26.0897
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 814.8969 - mae: 24.4528 - val_loss: 903.7983 - val_mae: 25.9990
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [27]:
# Evaluasi model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 24.651756286621094


In [13]:
# Fungsi prediksi
def predict_health_impact_cnn(aqi, pm10, pm2_5, no2, so2, o3, temperature, humidity, wind_speed):
    """
    Fungsi untuk memprediksi HealthImpactScore menggunakan CNN.
    """
    input_data = np.array([[aqi, pm10, pm2_5, no2, so2, o3, temperature, humidity, wind_speed]])
    input_data_scaled = scaler.transform(input_data)  # Normalisasi data
    input_data_scaled = input_data_scaled.reshape(input_data_scaled.shape[0], input_data_scaled.shape[1], 1)  # Reshape untuk CNN
    return model.predict(input_data_scaled)[0][0]

In [14]:
# Contoh penggunaan fungsi
predicted_score = predict_health_impact_cnn(187.2700594, 295.8530392, 13.03856044, 6.639263013, 66.16114965, 54.62427998, 5.150335038, 84.42434365, 6.137755447)
print("Predicted HealthImpactScore:", predicted_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Predicted HealthImpactScore: 78.542


In [15]:
from sklearn.metrics import r2_score, mean_absolute_error

# Prediksi pada data train
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [16]:
# Evaluasi pada train set
r2_train = r2_score(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)

In [17]:
# Evaluasi pada test set
r2_test = r2_score(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

In [18]:
print("=== Evaluasi Model ===")
print(f"Train Set R-squared: {r2_train:.4f}")
print(f"Train Set MAE: {mae_train:.4f}")
print(f"Test Set R-squared: {r2_test:.4f}")
print(f"Test Set MAE: {mae_test:.4f}")


=== Evaluasi Model ===
Train Set R-squared: -0.0012
Train Set MAE: 24.9717
Test Set R-squared: -0.0002
Test Set MAE: 25.0332


In [19]:
print("\n=== Akurasi ===")
if r2_test >= 0.9:
    print("Model memiliki akurasi sangat tinggi pada test set (R² > 0.9).")
elif r2_test >= 0.8:
    print("Model memiliki akurasi baik pada test set (R² antara 0.8 dan 0.9).")
elif r2_test >= 0.7:
    print("Model memiliki akurasi sedang pada test set (R² antara 0.7 dan 0.8).")
else:
    print("Model memiliki akurasi rendah pada test set (R² < 0.7).")

if abs(r2_train - r2_test) > 0.1:
    print("Namun, terdapat perbedaan signifikan antara performa train dan test set, kemungkinan terjadi overfitting.")
else:
    print("Performa model pada train dan test set konsisten, tidak ada indikasi overfitting.")


=== Akurasi ===
Model memiliki akurasi rendah pada test set (R² < 0.7).
Performa model pada train dan test set konsisten, tidak ada indikasi overfitting.


In [20]:
def classify_health_impact(score):
    """
    Mengklasifikasikan HealthImpactScore ke dalam kategori.
    """
    if score <= 20:
        return "Sehat (Healthy)"
    elif score <= 50:
        return "Sedang (Moderate)"
    elif score <= 100:
        return "Tidak Sehat (Unhealthy)"
    elif score <= 150:
        return "Sangat Tidak Sehat (Very Unhealthy)"
    else:
        return "Berbahaya (Hazardous)"

In [21]:
# Contoh Prediksi dan Klasifikasi
predicted_score = predict_health_impact_cnn(187.2700594, 295.8530392, 13.03856044, 6.639263013, 66.16114965, 54.62427998, 5.150335038, 84.42434365, 6.137755447)
category = classify_health_impact(predicted_score)

print(f"Predicted HealthImpactScore: {predicted_score:.2f}")
print(f"Kategori: {category}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted HealthImpactScore: 78.54
Kategori: Tidak Sehat (Unhealthy)
