In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Load the dataset
df = pd.read_csv('/kaggle/input/districtwise-ground-water-resources-by-july-2017/Dynamic_2017_2_0.csv')

# Drop rows with missing values
df = df.dropna()

# Define feature columns and target
features = [
    'Total Annual Ground Water Recharge',
    'Annual Extractable Ground Water Resource',
    'Total Current Annual Ground Water Extraction'
]
target = 'Stage of Ground Water Extraction (%)'

# Select features (X) and target (y)
X = df[features]
y = df[target]

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

# Feature importance
importances = model.feature_importances_
for feat, imp in zip(features, importances):
    print(f'{feat}: {imp:.3f}')



Mean Absolute Error: 4.01
R² Score: 0.95
Total Annual Ground Water Recharge: 0.175
Annual Extractable Ground Water Resource: 0.241
Total Current Annual Ground Water Extraction: 0.583
