In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:
# Step 1: Load the data
df = pd.read_csv("rainfall.csv")

In [3]:
# Step 3: Reshape the dataset from wide to long format
df_melted = df.melt(id_vars=["DISTRICT"], 
                    value_vars=["JAN", "FEB", "MAR", "APR", "MAY", "JUN", 
                                "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"],
                    var_name="MONTH", value_name="RAINFALL")

In [4]:
# Step 4: Encode district and month
district_encoder = LabelEncoder()
month_encoder = LabelEncoder()

In [5]:
df_melted["DISTRICT_ENC"] = district_encoder.fit_transform(df_melted["DISTRICT"].str.upper())
df_melted["MONTH_ENC"] = month_encoder.fit_transform(df_melted["MONTH"])

In [6]:
# Step 5: Features and Target
X = df_melted[["DISTRICT_ENC", "MONTH_ENC"]]
y = df_melted["RAINFALL"]

In [7]:
# Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
# Step 7: Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Step 8: Random Forest Regressor (can also try others like Linear, DT)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

In [10]:
# Step 9: Predictions and metrics
y_pred = rf_model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Random Forest Regressor MSE: {mse}")
print(f"R² Score: {r2}")

Random Forest Regressor MSE: 17219.26022046109
R² Score: 0.29935360075751716


In [11]:
# Step 10: Save model and encoders
joblib.dump(rf_model, "rainfall_model.pkl")
joblib.dump(scaler, "rainfall_scaler.pkl")
joblib.dump(district_encoder, "district_encoder.pkl")
joblib.dump(month_encoder, "month_encoder.pkl")

['month_encoder.pkl']