In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Loading the Dataset

In [5]:
dataset_path = "/content/sr_wq_rs_join.csv"
df = pd.read_csv(dataset_path, encoding='latin1')

In [6]:
unnecessary_columns = ['system:index', 'SiteID', 'date_unity', 'path', 'row', 'sat', '.geo', 'endtime', 'date', 'date_only', 'source', 'lat', 'long', 'TZID', 'date_utc', 'clouds', 'time', 'landsat_id', 'timediff', 'pwater', 'type', 'id']
df = df.drop(columns=unnecessary_columns, errors='ignore')

In [7]:
df.fillna(df.median(), inplace=True)

# Feature Selection

In [8]:
features = ['chl_a', 'doc', 'secchi', 'tss', 'p_sand']

In [9]:
def classify_suitability(value):
    if value >= 0.7:
        return "Highly Preferred"
    elif 0.4 <= value < 0.7:
        return "Partially Preferred"
    else:
        return "Least Preferred"

In [10]:
df['water_suitability'] = (df['chl_a'] * 0.3 + df['secchi'] * 0.3 + df['doc'] * 0.2 + df['tss'] * 0.2) / 4  # Example formula
df['agriculture_suitability'] = df['water_suitability'].apply(classify_suitability)

# Label Encoding

In [11]:
label_encoder = LabelEncoder()
df['suitability_encoded'] = label_encoder.fit_transform(df['agriculture_suitability'])

In [12]:
X = df[features]
y = df['suitability_encoded']

# Model Training

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Random Forest

In [15]:
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_preds))

Random Forest Accuracy: 0.999187982135607


# XGBoost Model

In [16]:
xgb_model = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=10)
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_preds))

XGBoost Accuracy: 0.9994448449294456


# Naive Bayes Model

In [17]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_preds))

Naive Bayes Accuracy: 0.9836270683669326


# Neural Networks

In [19]:
nn_model = Sequential()
nn_model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
nn_model.add(Dropout(0.3))
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dropout(0.3))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(3, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
from tensorflow.keras.callbacks import EarlyStopping

# Optimization

In [22]:
nn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
nn_model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 3ms/step - accuracy: 0.9980 - loss: 0.0053 - val_accuracy: 0.9989 - val_loss: 0.0029
Epoch 2/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 3ms/step - accuracy: 0.9982 - loss: 0.0056 - val_accuracy: 0.9982 - val_loss: 0.0042
Epoch 3/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 3ms/step - accuracy: 0.9981 - loss: 0.0050 - val_accuracy: 0.9988 - val_loss: 0.0033
Epoch 4/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 3ms/step - accuracy: 0.9981 - loss: 0.0049 - val_accuracy: 0.9987 - val_loss: 0.0032
Epoch 5/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 3ms/step - accuracy: 0.9983 - loss: 0.0045 - val_accuracy: 0.9990 - val_loss: 0.0027
Epoch 6/30
[1m15086/15086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 3ms/step - accuracy: 0.9984 - loss: 0.0052 - val_accuracy: 0.9991 - val_loss: 0.002

<keras.src.callbacks.history.History at 0x7d97994c5190>

# Evaluation of the Model

In [23]:
nn_preds = np.argmax(nn_model.predict(X_test), axis=1)
print("Neural Network Accuracy:", accuracy_score(y_test, nn_preds))

[1m3772/3772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step
Neural Network Accuracy: 0.9991382667561544


# Classification Report

In [24]:
print("\nRandom Forest Classification Report:\n", classification_report(y_test, rf_preds))
print("\nXGBoost Classification Report:\n", classification_report(y_test, xgb_preds))
print("\nNaive Bayes Classification Report:\n", classification_report(y_test, nb_preds))
print("\nNeural Network Classification Report:\n", classification_report(y_test, nn_preds))


Random Forest Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    119601
           1       0.88      0.35      0.50        20
           2       0.96      0.94      0.95      1066

    accuracy                           1.00    120687
   macro avg       0.95      0.76      0.82    120687
weighted avg       1.00      1.00      1.00    120687


XGBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    119601
           1       0.92      0.60      0.73        20
           2       0.97      0.97      0.97      1066

    accuracy                           1.00    120687
   macro avg       0.96      0.86      0.90    120687
weighted avg       1.00      1.00      1.00    120687


Naive Bayes Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99    119601
           1       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
