In [1]:
#PM 2.5
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error, r2_score, f1_score
from sklearn.model_selection import train_test_split 

# 1. Load the Preprocessed Data and the Saved Model
try:
    df_imputed = pd.read_csv("PB003_preprocessed.csv") # Load preprocessed data
    model = joblib.load("trained_model_PM25.joblib")  # Load the trained model
except FileNotFoundError:
    print("Error: Make sure PB003_preprocessed.csv and trained_model_PM25.joblib are present and run the training/preprocessing notebook first!")
    exit()

# Define your target and features (consistent with training)
target_column = 'PM2.5 (ug/m3)'
features = [col for col in df_imputed.columns if col not in [target_column, 'From Date', 'To Date']]


# 2. Prepare the Data (Split into Training and Testing Sets)

X = df_imputed[features]
y = df_imputed[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 3. Make Predictions on the Test Set (Using the loaded model)
y_pred = model.predict(X_test)


# 4. Calculate RMSE and R-squared
rmse = mean_squared_error(y_test, y_pred, squared=False)
mse = mean_squared_error(y_test, y_pred, squared=True)  # MSE  (or just rmse**2)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse}")
print(f"R-squared: {r2}")
print(f"MSE: {mse}")




# Save the predictions to the dataframe and save as csv
df_imputed['Predictions'] = model.predict(X)
df_imputed.to_csv('PB003_predictions.csv', index=False)
print("Predictions saved to PB003_predictions.csv")



RMSE: 19.61776453640282
R-squared: 0.7838112215550046
MSE: 384.8566854057441
Predictions saved to PB003_predictions.csv
