In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [3]:
# Membaca dataset
data = pd.read_csv('data_kualitas_udara_jogja.csv')
data.head()

Unnamed: 0,Date,PM10,SO2,CO,O3,NO2,Max,Critical Component,Category
0,1/1/2020,30,2,69,19,0,69,CO,Moderate
1,1/2/2020,16,2,58,33,0,58,CO,Moderate
2,1/3/2020,12,2,46,18,0,46,CO,Good
3,1/4/2020,8,2,84,29,0,84,CO,Moderate
4,1/5/2020,8,3,50,0,0,50,CO,Good


**Keterangan Atribut:**
* Date - Tanggal pengukuran
* PM10 - Pengukuran Materi Partikulat
* SO2 - Pengukuran Sulfur Dioksida
* CO - Pengukuran Karbon Monoksida
* O3 - Pengukuran ozon
* NO2 - Pengukuran Natrium DIoksida
* Maks - Nilai terukur tertinggi
* Critical Component - Komponen yang memiliki nilai pengukuran tertinggi
* Kategori – Kategori pencemaran udara, baik atau tidak

**Range	Category**

* 0-50	Good
* 51-100	Moderate
* 101-200	Unhealthy
* 201-300	Very Unhealthy
* 300++	Dangerous


In [4]:
# Memisahkan fitur (features) dan label (target)
X = data[['PM10', 'SO2', 'CO', 'O3', 'NO2']]
y = data['Category']

In [5]:
# Memisahkan data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inisialisasi model Random Forest
rf_model = RandomForestClassifier(random_state=42)

# Melatih model dengan data latih
rf_model.fit(X_train, y_train)

# Melakukan prediksi dengan data uji
y_pred = rf_model.predict(X_test)

# Mengukur kinerja model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Akurasi:", accuracy)
#print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

Akurasi: 0.972972972972973

Classification Report:
               precision    recall  f1-score   support

        Good       1.00      0.96      0.98        54
    Moderate       0.90      1.00      0.95        19
   Unhealthy       1.00      1.00      1.00         1

    accuracy                           0.97        74
   macro avg       0.97      0.99      0.98        74
weighted avg       0.98      0.97      0.97        74



In [6]:
# Prediksi kualitas air untuk data baru
new_data = pd.DataFrame({'PM10': [15], 'SO2': [2], 'CO': [60], 'O3': [25], 'NO2': [0]})
predicted_category = rf_model.predict(new_data)
print("\nPrediksi Kualitas Air untuk Data Baru:", predicted_category[0])


Prediksi Kualitas Air untuk Data Baru: Moderate
