In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
df = pd.read_csv("Daily.csv")

In [3]:
X = df[["Rain (mm)", "Avg_Temp", "Avg_Humidity"]]  # modify to your feature columns
y = df["GW_Level_Legend"]

In [4]:

# 4. ENCODE TARGET LABELS IF NEEDED
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [5]:
# 5. SPLIT INTO TRAIN/TEST
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)



In [6]:
# 6. FEATURE SCALING
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [7]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Dictionary of models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="mlogloss"),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN (k=5)": KNeighborsClassifier(n_neighbors=5)
}


In [8]:
# Train and evaluate each model
for name, model in models.items():
    print(f"\n🔍 Model: {name}")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


🔍 Model: Random Forest
                                precision    recall  f1-score   support

   Deep Water Level (20 to 40)       0.66      0.71      0.68     34390
Moderate Water Level (5 to 10)       0.38      0.42      0.40     18517
  Shallow Water Level (2 to 5)       0.22      0.15      0.18     14901
   Very Deep Water Level (>40)       0.60      0.60      0.60     21235

                      accuracy                           0.53     89043
                     macro avg       0.47      0.47      0.47     89043
                  weighted avg       0.52      0.53      0.52     89043


🔍 Model: XGBoost


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


                                precision    recall  f1-score   support

   Deep Water Level (20 to 40)       0.70      0.79      0.74     34390
Moderate Water Level (5 to 10)       0.41      0.60      0.49     18517
  Shallow Water Level (2 to 5)       0.47      0.06      0.11     14901
   Very Deep Water Level (>40)       0.69      0.70      0.69     21235

                      accuracy                           0.61     89043
                     macro avg       0.57      0.54      0.51     89043
                  weighted avg       0.60      0.61      0.57     89043


🔍 Model: Gradient Boosting
                                precision    recall  f1-score   support

   Deep Water Level (20 to 40)       0.70      0.79      0.74     34390
Moderate Water Level (5 to 10)       0.42      0.61      0.49     18517
  Shallow Water Level (2 to 5)       0.49      0.06      0.11     14901
   Very Deep Water Level (>40)       0.69      0.70      0.69     21235

                      accuracy 

In [10]:
# Set best_model manually after evaluation
best_model = models["XGBoost"]  # or whichever performed best


In [11]:
import joblib

# Save model and preprocessing tools
joblib.dump(best_model, "xgboost_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")

print("✅ All model artifacts saved successfully.")


✅ All model artifacts saved successfully.
