In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score

file_names = ['FilteredData/filtered_dki1.csv', 'FilteredData/filtered_dki2.csv', 'FilteredData/filtered_dki3.csv', 'FilteredData/filtered_dki4.csv', 'FilteredData/filtered_dki5.csv']
datasets = [pd.read_csv(file) for file in file_names]


for i, df in enumerate(datasets):
    df.rename(columns={'stasiun': 'place', 'categori': 'AQI_category'}, inplace=True)
    df['place'] = f'Place_{i+1}'  # Add place column for identification

aqi = pd.concat(datasets, ignore_index=True)
aqi = aqi.dropna(subset=['AQI_category'])
aqi = aqi.drop(columns=[col for col in aqi.columns if 'Unnamed' in col], errors='ignore')

aqi.to_csv("alldata.csv")


le = LabelEncoder()
aqi['AQI_category_encoded'] = le.fit_transform(aqi['AQI_category'])


features = ['pm10', 'so2', 'co', 'o3', 'no2', 'place']
aqi = pd.get_dummies(aqi, columns=['place'], drop_first=True)
X = aqi.drop(columns=['AQI_category', 'AQI_category_encoded'])
y = aqi['AQI_category_encoded']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
print("Model Performance:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")


with open('rf_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)


Model Performance:
              precision    recall  f1-score   support

           0       0.96      0.92      0.94       619
           1       1.00      0.95      0.97        20
           2       0.96      0.99      0.97      2175
           4       0.98      0.92      0.95       444

    accuracy                           0.97      3258
   macro avg       0.98      0.95      0.96      3258
weighted avg       0.97      0.97      0.97      3258

Accuracy: 0.97


In [5]:
def predict_aqi(pm10, so2, co, o3, no2, place):

    # Create a DataFrame for the input
    input_features = {
        'pm10': [pm10],
        'so2': [so2],
        'co': [co],
        'o3': [o3],
        'no2': [no2]
    }
    input_df = pd.DataFrame(input_features)

    # Add dummy variables for the place
    place_dummies = pd.get_dummies([place], prefix='place', drop_first=True)
    for col in model.feature_names_in_:
        if col.startswith('place_') and col not in place_dummies.columns:
            place_dummies[col] = 0  # Add missing dummy columns with 0
    input_df = pd.concat([input_df, place_dummies], axis=1)

    # Align the input DataFrame with the model's expected columns
    input_df = input_df.reindex(columns=model.feature_names_in_, fill_value=0)

    # Predict using the loaded model
    predicted_label = model.predict(input_df)[0]

    # Convert back to the original category label
    return le.inverse_transform([predicted_label])[0]

# Example Prediction
example_prediction = predict_aqi(
    pm10=27,
    so2=2,
    co=19,
    o3=20,
    no2=9,
    place='Place_1'
)

print(f"Predicted AQI: {example_prediction}")

Predicted AQI: 0
