In [61]:
import pandas as pd
import joblib
from prophet import Prophet


In [62]:
# Load dataset
df = pd.read_csv("data.csv")

# Convert year and month into a datetime format
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# Ensure 'WQI' is numeric and drop NaNs
df['WQI'] = pd.to_numeric(df['WQI'], errors='coerce')
df = df.dropna(subset=['WQI'])

# Prepare dataset for Prophet
df_prophet = df[['date', 'WQI']].rename(columns={'date': 'ds', 'WQI': 'y'})

# Ensure correct data types
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

# Print column names and first few rows for debugging
print(df_prophet.columns)  # Should be ['ds', 'y']
print(df_prophet.dtypes)   # 'ds' should be datetime64 and 'y' should be float


Index(['ds', 'y'], dtype='object')
ds    datetime64[ns]
y            float64
dtype: object


In [63]:
# Initialize Prophet model
prophet_model = Prophet()

# Fit the model
prophet_model.fit(df_prophet)

# Save the trained model
joblib.dump(prophet_model, "prophet_wqi_model.pkl")

print("✅ Prophet model saved successfully as prophet_wqi_model.pkl")


14:19:04 - cmdstanpy - INFO - Chain [1] start processing
14:19:05 - cmdstanpy - INFO - Chain [1] done processing


✅ Prophet model saved successfully as prophet_wqi_model.pkl


In [64]:
import pandas as pd
import joblib
from prophet import Prophet

# Load dataset
df = pd.read_csv("data.csv")

# Convert year and month into datetime
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# Ensure 'WQI' is numeric and drop NaNs
df['WQI'] = pd.to_numeric(df['WQI'], errors='coerce')
df = df.dropna(subset=['WQI'])

# Prepare dataset for Prophet
df_prophet = df[['date', 'WQI']].rename(columns={'date': 'ds', 'WQI': 'y'})

# Ensure correct data types
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

# Print column names and first few rows for debugging
print(df_prophet.columns)  # Should be ['ds', 'y']
print(df_prophet.dtypes)   # 'ds' should be datetime64 and 'y' should be float

# Initialize Prophet model
prophet_model = Prophet()

# Fit the model
prophet_model.fit(df_prophet)

# Save the model
joblib.dump(prophet_model, "prophet_wqi_model.joblib")
print("✅ Prophet model saved successfully!")

# Load model
def load_model():
    return joblib.load("prophet_wqi_model.joblib")

# Predict Next 12 Months
def predict_monthly():
    model = load_model()
    current_date = pd.Timestamp.today()
    future_dates = pd.date_range(start=current_date, periods=12, freq='ME')
    future_df = pd.DataFrame({'ds': future_dates})

    forecast = model.predict(future_df)
    return forecast[['ds', 'yhat']]

# Predict Next 5 Years
def predict_yearly():
    model = load_model()
    current_date = pd.Timestamp.today()
    future_dates = pd.date_range(start=current_date, periods=5, freq='YE')
    future_df = pd.DataFrame({'ds': future_dates})

    forecast = model.predict(future_df)
    return forecast[['ds', 'yhat']]

# Example Usage
print("\n📌 Monthly Predictions:")
print(predict_monthly())

print("\n📌 Yearly Predictions:")
print(predict_yearly())


Index(['ds', 'y'], dtype='object')
ds    datetime64[ns]
y            float64
dtype: object


14:19:05 - cmdstanpy - INFO - Chain [1] start processing
14:19:05 - cmdstanpy - INFO - Chain [1] done processing


✅ Prophet model saved successfully!

📌 Monthly Predictions:
                           ds       yhat
0  2025-03-31 14:19:05.215052  44.340758
1  2025-04-30 14:19:05.215052  45.612635
2  2025-05-31 14:19:05.215052  45.536692
3  2025-06-30 14:19:05.215052  56.003823
4  2025-07-31 14:19:05.215052  53.658338
5  2025-08-31 14:19:05.215052  54.426979
6  2025-09-30 14:19:05.215052  49.005011
7  2025-10-31 14:19:05.215052  49.916190
8  2025-11-30 14:19:05.215052  50.950786
9  2025-12-31 14:19:05.215052  43.467914
10 2026-01-31 14:19:05.215052  44.546227
11 2026-02-28 14:19:05.215052  43.260082

📌 Yearly Predictions:
                          ds       yhat
0 2025-12-31 14:19:05.237178  43.467914
1 2026-12-31 14:19:05.237178  43.154294
2 2027-12-31 14:19:05.237178  42.840304
3 2028-12-31 14:19:05.237178  42.559101
4 2029-12-31 14:19:05.237178  42.245875
