In [None]:
# 1. Importing necessary libraries
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# 2. Load the dataset
data = pd.read_csv('District_Statewise_Well.csv')
target_column = 'Net Ground Water Availability for future use'

In [None]:
# 3. Data Preprocessing
data = data.drop(columns=['S.no.'], errors='ignore')

# Encoding categorical columns
label_encoder = LabelEncoder()
data['Name of State'] = label_encoder.fit_transform(data['Name of State'])
data['Name of District'] = label_encoder.fit_transform(data['Name of District'])

# Generate lag features for time series structure
def create_lag_features(df, target_column, lags):
    for lag in range(1, lags + 1):
        df[f'{target_column}_lag_{lag}'] = df[target_column].shift(lag)
    return df

data = create_lag_features(data, target_column, 3)
data.dropna(inplace=True)

In [None]:
# 4. Split features and target
X = data.drop(columns=[target_column])
y = data[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 5. Model Training and Evaluation

# SVM
svm_model = SVR(kernel='rbf')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM R2 Score:", r2_score(y_test, y_pred_svm))

# KNN
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
print("KNN R2 Score:", r2_score(y_test, y_pred_knn))

# GBR (Best Performing)
gbr_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gbr_model.fit(X_train, y_train)
y_pred_gbr = gbr_model.predict(X_test)
print("GBR R2 Score:", r2_score(y_test, y_pred_gbr))

In [None]:
# 6. Save the final model
import joblib
joblib.dump(gbr_model, 'groundwater_model.pkl')