In [3]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [2]:
pip install tensorflow 

Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Using cached flatbuffers-25.12.19-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.7.0-py3-none-any.whl.metadata (1.5 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: C:\Users\Karmanya\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
df = pd.read_csv("data_distanceFilter_best_before_selection.csv")
df = df.drop(columns=["Unnamed: 0"])


In [5]:
le = LabelEncoder()
df["farming_practice_enc"] = le.fit_transform(df["farming_practice"])

y = df["farming_practice_enc"]
X = df.drop(columns=["farming_practice", "farming_practice_enc"])


In [6]:
rain_cols = [c for c in X.columns if c.startswith("yearly_rain_") and c[-2:].isdigit()]
min_cols  = [c for c in X.columns if c.startswith("yearly_min_temp_") and c[-2:].isdigit()]
max_cols  = [c for c in X.columns if c.startswith("yearly_max_temp_") and c[-2:].isdigit()]
mean_cols = [c for c in X.columns if c.startswith("yearly_avg_mean_temp_") and c[-2:].isdigit()]

rain_cols.sort()
min_cols.sort()
max_cols.sort()
mean_cols.sort()


In [7]:
timesteps = len(rain_cols)
features = 4

X_seq = []

for i in range(len(X)):
    seq = []
    for t in range(timesteps):
        seq.append([
            X.loc[i, rain_cols[t]],
            X.loc[i, min_cols[t]],
            X.loc[i, max_cols[t]],
            X.loc[i, mean_cols[t]],
        ])
    X_seq.append(seq)

X_seq = np.array(X_seq)


In [8]:
print(X_seq.shape)  
# (samples, timesteps, features)


(3007, 15, 4)


In [9]:
scaler = MinMaxScaler()
ns, ts, nf = X_seq.shape
X_seq = X_seq.reshape(ns, ts*nf)
X_seq = scaler.fit_transform(X_seq)
X_seq = X_seq.reshape(ns, ts, nf)


In [10]:
y_cat = to_categorical(y)


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_cat, test_size=0.2, random_state=42
)


In [12]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(timesteps, features)),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(y_cat.shape[1], activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


  super().__init__(**kwargs)


In [16]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32
)


Epoch 1/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7971 - loss: 0.5040 - val_accuracy: 0.7957 - val_loss: 0.5472
Epoch 2/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8071 - loss: 0.5189 - val_accuracy: 0.8056 - val_loss: 0.5407
Epoch 3/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8208 - loss: 0.4831 - val_accuracy: 0.7807 - val_loss: 0.6396
Epoch 4/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.8141 - loss: 0.4975 - val_accuracy: 0.7940 - val_loss: 0.5664
Epoch 5/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8233 - loss: 0.4415 - val_accuracy: 0.8073 - val_loss: 0.5129
Epoch 6/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.8387 - loss: 0.4337 - val_accuracy: 0.8123 - val_loss: 0.4989
Epoch 7/30
[1m76/76[0m [32m━━━━

In [19]:
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8439 - loss: 0.4154
Test Accuracy: 0.8438538312911987


In [18]:
sample = X_seq[0].reshape(1, timesteps, features)
pred = model.predict(sample)[0]   # shape: (num_classes,)

# Get class names back from LabelEncoder
class_names = le.inverse_transform(range(len(pred)))

# Combine and sort by probability (high → low)
results = list(zip(class_names, pred))
results.sort(key=lambda x: x[1], reverse=True)

print("Farming Practice Probabilities:\n")
for name, prob in results:
    print(f"{name:25s} : {prob*100:.2f}%")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Farming Practice Probabilities:

Soil Mulching (>13 Celcious) : 44.85%
aerated irrigation (AI) (ph <7) : 37.04%
Film-mulching drip irrigation (Spain) : 9.93%
Soil Mulching (Yearly rainfall <400) : 7.93%
biochar amendment (lat <35) : 0.16%
Soil Mulching (<13 Celcious) : 0.07%
Soil Mulching (Clay soil) : 0.01%


In [20]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Get predictions on test set
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)

# With class names
class_names = le.inverse_transform(range(len(cm)))

print("\nConfusion Matrix with Labels:")
for i, row in enumerate(cm):
    print(f"{class_names[i]:20s} -> {row}")

# Detailed report
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=class_names))


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
Confusion Matrix:
 [[ 46  18  11   0  10]
 [  0 407   0   0   0]
 [  1   4  39   0   9]
 [  0   8   0   0   0]
 [  3  10  20   0  16]]

Confusion Matrix with Labels:
Film-mulching drip irrigation (Spain) -> [46 18 11  0 10]
Soil Mulching (<13 Celcious) -> [  0 407   0   0   0]
Soil Mulching (>13 Celcious) -> [ 1  4 39  0  9]
Soil Mulching (Clay soil) -> [0 8 0 0 0]
Soil Mulching (Yearly rainfall <400) -> [ 3 10 20  0 16]

Classification Report:

                                       precision    recall  f1-score   support

Film-mulching drip irrigation (Spain)       0.92      0.54      0.68        85
         Soil Mulching (<13 Celcious)       0.91      1.00      0.95       407
         Soil Mulching (>13 Celcious)       0.56      0.74      0.63        53
            Soil Mulching (Clay soil)       0.00      0.00      0.00         8
 Soil Mulching (Yearly rainfall <400)       0.46      0.33      0.38        49

 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [21]:
soil_cols = [c for c in X.columns if any(
    k in c for k in ["phh2o", "nitrogen", "sand", "clay", "silt", "soc", "cec", "bdod", "ocs", "ocd", "cfvo"]
)]
X_soil = X[soil_cols].values


In [22]:
soil_scaler = MinMaxScaler()
X_soil = soil_scaler.fit_transform(X_soil)


In [23]:
Xc_train, Xc_test, Xs_train, Xs_test, y_train, y_test = train_test_split(
    X_seq, X_soil, y_cat, test_size=0.2, random_state=42
)


In [24]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate


In [25]:
# Climate branch
climate_input = Input(shape=(timesteps, features))
x1 = LSTM(64, return_sequences=True)(climate_input)
x1 = Dropout(0.3)(x1)
x1 = LSTM(32)(x1)

# Soil branch
soil_input = Input(shape=(Xs_train.shape[1],))
x2 = Dense(32, activation="relu")(soil_input)
x2 = Dense(16, activation="relu")(x2)

# Merge
merged = Concatenate()([x1, x2])
merged = Dense(32, activation="relu")(merged)
output = Dense(y_cat.shape[1], activation="softmax")(merged)

model = Model(inputs=[climate_input, soil_input], outputs=output)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()


In [26]:
history = model.fit(
    [Xc_train, Xs_train], y_train,
    validation_data=([Xc_test, Xs_test], y_test),
    epochs=30,
    batch_size=32
)


Epoch 1/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - accuracy: 0.6520 - loss: 1.1822 - val_accuracy: 0.6761 - val_loss: 1.0634
Epoch 2/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6998 - loss: 0.9695 - val_accuracy: 0.6761 - val_loss: 0.7776
Epoch 3/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7472 - loss: 0.7564 - val_accuracy: 0.7375 - val_loss: 0.7026
Epoch 4/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7663 - loss: 0.6276 - val_accuracy: 0.7575 - val_loss: 0.6202
Epoch 5/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7688 - loss: 0.6017 - val_accuracy: 0.7558 - val_loss: 0.6151
Epoch 6/30
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7771 - loss: 0.5691 - val_accuracy: 0.7276 - val_loss: 0.6659
Epoch 7/30
[1m76/76[0m [32m━━━━

In [27]:
loss, acc = model.evaluate([Xc_test, Xs_test], y_test)
print("Test Accuracy:", acc)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8322 - loss: 0.3975
Test Accuracy: 0.8322259187698364


In [28]:
sample_climate = X_seq[0].reshape(1, timesteps, features)
sample_soil = X_soil[0].reshape(1, -1)

pred = model.predict([sample_climate, sample_soil])[0]
class_names = le.inverse_transform(range(len(pred)))

results = list(zip(class_names, pred))
results.sort(key=lambda x: x[1], reverse=True)

for name, prob in results:
    print(f"{name:35s} : {prob*100:.2f}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step
Soil Mulching (>13 Celcious)        : 56.42%
aerated irrigation (AI) (ph <7)     : 34.98%
Film-mulching drip irrigation (Spain) : 7.71%
Soil Mulching (Yearly rainfall <400) : 0.74%
Soil Mulching (<13 Celcious)        : 0.11%
biochar amendment (lat <35)         : 0.03%
Soil Mulching (Clay soil)           : 0.01%


In [29]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Predict on test set
y_pred_probs = model.predict([Xc_test, Xs_test])
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)

# Get class names
class_names = le.inverse_transform(range(len(cm)))

# Print matrix with labels
print("\nConfusion Matrix with Labels:")
for i, row in enumerate(cm):
    print(f"{class_names[i]:35s} -> {row}")

# Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=class_names))


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step  
Confusion Matrix:
 [[ 44  16  12   0  13]
 [  0 399   0   4   4]
 [  4   0  41   1   7]
 [  0   5   0   1   2]
 [  2   1  28   2  16]]

Confusion Matrix with Labels:
Film-mulching drip irrigation (Spain) -> [44 16 12  0 13]
Soil Mulching (<13 Celcious)        -> [  0 399   0   4   4]
Soil Mulching (>13 Celcious)        -> [ 4  0 41  1  7]
Soil Mulching (Clay soil)           -> [0 5 0 1 2]
Soil Mulching (Yearly rainfall <400) -> [ 2  1 28  2 16]

Classification Report:

                                       precision    recall  f1-score   support

Film-mulching drip irrigation (Spain)       0.88      0.52      0.65        85
         Soil Mulching (<13 Celcious)       0.95      0.98      0.96       407
         Soil Mulching (>13 Celcious)       0.51      0.77      0.61        53
            Soil Mulching (Clay soil)       0.12      0.12      0.12         8
 Soil Mulching (Yearly rainfall <400)       0.38      0