In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

# 1. Load dataset
data = pd.read_csv(r"C:\Ignite36\Final_Merged_Dataset.csv")

# 2. Extract month from Date
data["Month"] = pd.to_datetime(data["Date"], errors="coerce").dt.month

# 3. Features (X) and Target (y)
X = data.drop(["crop name"], axis=1)   # input features
y = data["crop name"]                  # target variable

# 4. Drop non-useful columns
X = X.drop(["Date", "Radiation_Sanity"], axis=1, errors='ignore')

# 5. Define categorical and numerical columns
categorical_cols = ["Season", "growth stage"]   # categorical features
numeric_cols = [col for col in X.columns if col not in categorical_cols]

# 6. Preprocessing: OneHot for categorical, Scaling for numerical
ct = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", StandardScaler(), numeric_cols)
    ])

X_processed = ct.fit_transform(X)

# 7. Encode target (crop name → numbers)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 8. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("Crop Classes:", le.classes_)


X_train shape: (4977, 17)
X_test shape: (1245, 17)
Crop Classes: ['Barley' 'Buckwheat' 'Cabbage' 'Cauliflower' 'Ginger' 'Large Cardamom'
 'Maize' 'Mandarin' 'Mustard' 'Orange' 'Paddy (Rice)' 'Peas' 'Potato'
 'Soybean' 'Sugarcane' 'Tomato' 'Turmeric' 'Wheat']


In [11]:
data.columns

Index(['Date', 'Temperature_C', 'Relative_Humidity_%', 'Pressure_Pa',
       'Net_Radiation_MJ_m2_day', 'Wind_Speed_m_s', 'Temperature_F',
       'Radiation_Sanity', 'ET0', 'ETc', 'Season', 'crop name', 'growth stage',
       'Kc', 'Month'],
      dtype='object')

In [4]:
from tensorflow.keras.utils import to_categorical

num_classes = len(le.classes_)  # total number of crops
y_train_categorical = to_categorical(y_train, num_classes=num_classes)
y_test_categorical = to_categorical(y_test, num_classes=num_classes)

print("y_train_categorical shape:", y_train_categorical.shape)
print("y_test_categorical shape:", y_test_categorical.shape)


y_train_categorical shape: (4977, 18)
y_test_categorical shape: (1245, 18)


In [5]:
X_train.shape, y_train_categorical.shape, X_test.shape, y_test_categorical.shape

((4977, 17), (4977, 18), (1245, 17), (1245, 18))

In [22]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Input & output sizes
input_dim = X_train.shape[1]        # number of features after preprocessing
num_classes = len(le.classes_)      # number of crops

# Building model
model = Sequential([
    Dense(256, activation='relu', input_shape=(input_dim,)),   # First hidden layer
    Dropout(0.3),                                              # Dropout to prevent overfitting
    Dense(128, activation='relu'),                              # Second hidden layer
    Dropout(0.3),
    Dense(num_classes, activation='softmax')                   # Output layer
])

# Compiling model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
model.summary()


In [23]:
from tensorflow.keras.callbacks import EarlyStopping

# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='val_loss',       # track validation loss
    patience=10,              # stop if no improvement for 10 epochs
    restore_best_weights=True # roll back to best model
)

# Train model
history = model.fit(
    X_train, y_train_categorical,
    validation_data=(X_test, y_test_categorical),
    epochs=100,               # max epochs
    batch_size=32,            # you can tune (16, 32, 64…)
    callbacks=[early_stop],
    verbose=1
)

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")


Epoch 1/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2783 - loss: 1.9363 - val_accuracy: 0.3968 - val_loss: 1.4406
Epoch 2/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4067 - loss: 1.4003 - val_accuracy: 0.5285 - val_loss: 1.1591
Epoch 3/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5063 - loss: 1.1433 - val_accuracy: 0.5687 - val_loss: 0.9431
Epoch 4/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5616 - loss: 0.9610 - val_accuracy: 0.6137 - val_loss: 0.8018
Epoch 5/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6192 - loss: 0.8315 - val_accuracy: 0.7133 - val_loss: 0.7025
Epoch 6/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6669 - loss: 0.7341 - val_accuracy: 0.7390 - val_loss: 0.5967
Epoch 7/100
[1m156/15

In [29]:
import pandas as pd
import numpy as np

def predict_crops_full_dataset(
    model,              # Trained neural network
    le,                 # LabelEncoder used for crops
    ct,                 # ColumnTransformer used for preprocessing
    data,               # Original dataset (with ETc, Kc, etc.)
    selected_crop=None  # Optional: crop selected by farmer
):
    """
    Predicts crops for the entire dataset and optionally provides info for a selected crop.
    
    Parameters:
    - model: trained neural network
    - le: LabelEncoder for crops
    - ct: ColumnTransformer used for preprocessing
    - data: original dataset (with ETc, Kc, etc.)
    - selected_crop: crop name selected by farmer (optional)
    
    Returns:
    - dict containing:
        - 'selected_crop_info': ETc, Kc, growth stage, season (if selected_crop provided)
        - 'data_with_predictions': original dataset + 'Predicted_Crop' column
    """
    
    result = {}
    
    # --- Part 1: Farmer-selected crop info ---
    if selected_crop:
        crop_info = data[data["crop name"] == selected_crop][
            ["ETc", "Kc", "growth stage", "Season"]
        ]
        result['selected_crop_info'] = crop_info.reset_index(drop=True)
    else:
        result['selected_crop_info'] = None
    
    # --- Part 2: Predict crop for entire dataset ---
    # Remove target and unnecessary columns
    X_all = data.drop(["crop name", "Date", "Radiation_Sanity"], axis=1, errors='ignore')
    
    # Ensure Month column exists
    if "Month" not in X_all.columns and "Date" in data.columns:
        X_all["Month"] = pd.to_datetime(data["Date"], errors="coerce").dt.month
    
    # Preprocess features
    X_all_processed = ct.transform(X_all)
    
    # Predict
    y_pred_prob = model.predict(X_all_processed)
    y_pred_classes = y_pred_prob.argmax(axis=1)
    y_pred_crop_names = le.inverse_transform(y_pred_classes)
    
    # Add predictions to dataset
    data_with_predictions = data.copy()
    data_with_predictions["Predicted_Crop"] = y_pred_crop_names
    
    result['data_with_predictions'] = data_with_predictions
    
    return result
