In [None]:
  from google.colab import files
uploaded = files.upload()

TypeError: 'NoneType' object is not subscriptable

In [None]:
# Load the dataset and show its head and columns
import pandas as pd

# Change the file path to a relative path, assuming the file is in the same directory
df = pd.read_csv("Crop Recommendation using Soil Properties and Weather Prediction (3).csv")
print(df.head())
print('\nColumns:', list(df.columns))

In [None]:
# Step 1: Check for missing values
import pandas as pd

missing = df.isnull().sum()
print(missing)

In [None]:
# Step 1 Enhancement: generate EC and temp range features, then normalize
from sklearn.preprocessing import MinMaxScaler

# Derive EC feature: sum of major nutrients
soil_df = df.copy()
soil_df['EC'] = soil_df['K'] + soil_df['P'] + soil_df['N']
# Seasonal temp range
seasons = ['W', 'Sp', 'Su', 'Au']
for s in seasons:
    soil_df['TRANGE-' + s] = soil_df['T2M_MAX-' + s] - soil_df['T2M_MIN-' + s]

# Encode Soilcolor as numeric via one-hot
soil_df = pd.get_dummies(soil_df, columns=['Soilcolor'], drop_first=True)

# Identify numeric columns to scale (excluding label and dummy soilcolor prefix 'Soilcolor_')
numeric_cols = soil_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
numeric_cols.remove('label') if 'label' in numeric_cols else None
# Exclude any dummy encoded
numeric_cols = [c for c in numeric_cols if not c.startswith('Soilcolor_')]

# Scale
scaler = MinMaxScaler()
soil_df[numeric_cols] = scaler.fit_transform(soil_df[numeric_cols])

# Preview normalized data
display_df = soil_df.head()
print(display_df)


In [None]:
# Step 2: Exploratory Data Analysis
import seaborn as sns
import matplotlib.pyplot as plt

# Correlation heatmap of numeric features plus EC and TRANGE features
df_corr = soil_df[numeric_cols + ['EC'] + [col for col in soil_df.columns if col.startswith('TRANGE-')]].corr()
plt.figure(figsize=(12,10))
sns.heatmap(df_corr, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()

# Boxplots for Zn, S, EC, PS
plt.figure(figsize=(12,8))
for i, col in enumerate(['Zn', 'S', 'EC', 'PS'], 1):
    plt.subplot(2,2,i)
    sns.boxplot(x=soil_df[col])
    plt.title('Boxplot of ' + col)
plt.tight_layout()
plt.show()

In [None]:
# Step 3: RFE with Random Forest to select top features and rebuild model
# Imports and train-test split
tqdm_installed = True
try:
    from tqdm import tqdm
except ImportError:
    tqdm_installed = False
    %pip install tqdm
    from tqdm import tqdm

from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Prepare X and y
df_model = soil_df.copy()
X = df_model.drop(columns=['label'])
y = df_model['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize RandomForest and RFE
rf = RandomForestClassifier(n_estimators=100, random_state=42)
# Select top 10 features
selector = RFE(estimator=rf, n_features_to_select=10, step=1)
selector = selector.fit(X_train, y_train)
selected_features = X_train.columns[selector.support_].tolist()

# Show selected features
print('Selected Features:')
for feat in selected_features:
    print(feat)

# Rebuild model using only selected features
rf_selected = RandomForestClassifier(n_estimators=100, random_state=42)
rf_selected.fit(X_train[selected_features], y_train)

# Evaluate on test set
y_pred = rf_selected.predict(X_test[selected_features])
acc = accuracy_score(y_test, y_pred)
print('Test Accuracy:', acc)

# Classification report
print('Classification Report:')
print(classification_report(y_test, y_pred))

# Confusion matrix heatmap
import seaborn as sns
import matplotlib.pyplot as plt
cm = confusion_matrix(y_test, y_pred, labels=rf_selected.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=rf_selected.classes_, yticklabels=rf_selected.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# Step 4: Hyperparameter tuning for RandomForest on selected features
# Using GridSearchCV to optimize n_estimators and max_depth
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train[selected_features], y_train)

# Display best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print('Best Parameters:')
print(best_params)
print('Best CV Accuracy:')
print(best_score)

# Evaluate tuned model on test set
tuned_rf = grid_search.best_estimator_
y_pred_tuned = tuned_rf.predict(X_test[selected_features])
acc_tuned = accuracy_score(y_test, y_pred_tuned)
print('Test Accuracy after Tuning:')
print(acc_tuned)

# Show a snippet of cv results
cv_results = pd.DataFrame(grid_search.cv_results_)
print(cv_results[['param_n_estimators','param_max_depth','param_min_samples_split','mean_test_score']].head())

# Confusion matrix for tuned model
cm_tuned = confusion_matrix(y_test, y_pred_tuned, labels=tuned_rf.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm_tuned, annot=True, fmt='d', xticklabels=tuned_rf.classes_, yticklabels=tuned_rf.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - Tuned RF')
plt.show()

In [None]:
# Step 5: Train and evaluate XGBoost on selected features
# Install xgboost if needed
try:
    import xgboost as xgb
except ImportError:
    %pip install xgboost
    import xgboost as xgb

from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder # Import LabelEncoder

# Initialize LabelEncoder
le = LabelEncoder()

# Encode the target variable y into numerical labels
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Initialize XGBClassifier
# Removed use_label_encoder=False as it's deprecated and handled by the LabelEncoder
xgb_clf = XGBClassifier(eval_metric='mlogloss', random_state=42)

# Cross-validation on training set using the encoded labels
grid_scores = cross_val_score(xgb_clf, X_train[selected_features], y_train_encoded, cv=5, scoring='accuracy')
print('XGBoost CV Accuracy Scores:')
print(grid_scores)
print('Mean CV Accuracy:')
print(grid_scores.mean())

# Fit on full training data using the encoded labels
xgb_clf.fit(X_train[selected_features], y_train_encoded)

# Predict on the test set
y_pred_xgb_encoded = xgb_clf.predict(X_test[selected_features])

# Decode the predictions back to original labels for evaluation and reporting
y_pred_xgb = le.inverse_transform(y_pred_xgb_encoded)

# Evaluate on test set using the original y_test and decoded predictions
acc_xgb = accuracy_score(y_test, y_pred_xgb)
print('XGBoost Test Accuracy:')
print(acc_xgb)

# Classification report using original labels
print('Classification Report - XGBoost:')
# Ensure that the labels in y_test and y_pred_xgb are consistent for classification_report
print(classification_report(y_test, y_pred_xgb))

# Confusion matrix heatmap using original labels
# Use the classes from the LabelEncoder as labels for the confusion matrix
cm_xgb = confusion_matrix(y_test, y_pred_xgb, labels=le.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm_xgb, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - XGBoost')
plt.show()

In [None]:
# Install xgboost and then run the evaluation with XGBoost
# Installing xgboost
%pip install xgboost

# Imports and training
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder # Ensure LabelEncoder is imported if not already

# Initialize LabelEncoder if not already in the current cell
# This assumes le is available from the previous cell
# If running this cell independently, you would need to re-initialize and fit the LabelEncoder
# le = LabelEncoder()
# y_train_encoded = le.fit_transform(y_train)
# y_test_encoded = le.transform(y_test)

# Re-encode y_train using the existing LabelEncoder instance 'le'
# This ensures y_train is numerical as required by XGBoost
y_train_encoded = le.transform(y_train)


# Removed use_label_encoder=False as it's deprecated and handled by LabelEncoder
xgb_clf = XGBClassifier(eval_metric='mlogloss', random_state=42)

# Cross-validation
# Use the encoded y_train for cross-validation
cv_scores_xgb = cross_val_score(xgb_clf, X_train[selected_features], y_train_encoded, cv=5, scoring='accuracy')
print('CV Scores XGBoost:')
print(cv_scores_xgb)
print('Mean CV Accuracy:')
print(cv_scores_xgb.mean())

# Fit and evaluate
# Use the encoded y_train for fitting
xgb_clf.fit(X_train[selected_features], y_train_encoded)

# Predict on the test set (features only, prediction will be encoded)
y_pred_xgb_encoded = xgb_clf.predict(X_test[selected_features])

# Decode the predictions back to original labels
y_pred_xgb = le.inverse_transform(y_pred_xgb_encoded)


# Evaluate on test set using the original y_test and decoded predictions
# Note: While y_test itself is not used for prediction input, it's needed here for comparison
# to the decoded predictions y_pred_xgb
acc_xgb_test = accuracy_score(y_test, y_pred_xgb)
print('Test Accuracy XGBoost:')
print(acc_xgb_test)

# Report and confusion
print('Classification Report - XGBoost:')
# Use the original y_test and the decoded y_pred_xgb for the classification report
print(classification_report(y_test, y_pred_xgb))

# Confusion matrix using original labels (le.classes_)
cm_xgb = confusion_matrix(y_test, y_pred_xgb, labels=le.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm_xgb, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - XGBoost')
plt.show()

In [None]:
# Installing and running XGBoost evaluation on selected features
# Install xgboost if not already installed
import importlib

if not importlib.util.find_spec('xgboost'):
    %pip install xgboost

# Imports
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder # Ensure LabelEncoder is imported

# Initialize LabelEncoder and encode the target variable
# We need to fit the encoder again in this cell if it's run independently
# If the previous cells (9 or 10) were run successfully, the `le` object might exist
# and `y_train` and `y_test` are already loaded, but not encoded in THIS cell scope.
# The safest way for a standalone cell is to re-encode.
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test) # Also encode test set for potential future use, though not strictly needed for THIS fit/cross_val_score

# Initialize classifier
# use_label_encoder=False is acceptable, but XGBoost still requires numerical labels.
# eval_metric='mlogloss' is good for multi-class classification.
xgb_clf = XGBClassifier(eval_metric='mlogloss', random_state=42)

# Cross-validation on training set using the encoded labels
# Use y_train_encoded here
cv_scores_xgb = cross_val_score(xgb_clf, X_train[selected_features], y_train_encoded, cv=5, scoring='accuracy')
print('XGBoost CV Scores:')
print(cv_scores_xgb)
print('Mean CV Accuracy:')
print(cv_scores_xgb.mean())

# Fit on training data using the encoded labels
# Use y_train_encoded here
xgb_clf.fit(X_train[selected_features], y_train_encoded)

# Predict on test set (prediction will be encoded)
y_pred_xgb_encoded = xgb_clf.predict(X_test[selected_features])

# Decode the predictions back to original labels for evaluation
y_pred_xgb = le.inverse_transform(y_pred_xgb_encoded)

# Evaluate on test set using the original y_test and decoded predictions
# Note: Although y_test is not used for prediction input, it is needed here
# for comparison with the decoded predictions y_pred_xgb
acc_xgb_test = accuracy_score(y_test, y_pred_xgb)
print('XGBoost Test Accuracy:')
print(acc_xgb_test)

# Detailed classification report using original labels
print('Classification Report - XGBoost:')
# Use the original y_test and the decoded y_pred_xgb for the classification report
print(classification_report(y_test, y_pred_xgb))

# Confusion matrix heatmap using original labels (le.classes_)
# Use the classes from the fitted LabelEncoder for consistent axis labels
cm_xgb = confusion_matrix(y_test, y_pred_xgb, labels=le.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm_xgb, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - XGBoost')
plt.show()

In [None]:
# Step 6: Build and train a simple Keras neural network on the selected features
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder # Import LabelEncoder

# Initialize LabelEncoder
# It's best to fit on the combined unique labels to ensure all classes are covered
# Assuming 'label' column is available in original df or soil_df
# If not, fit on y_train and y_test combined or separately, being careful
# Here we assume the 'le' object from previous cells is available and fitted
# If running this cell independently, you might need to re-fit LabelEncoder
try:
    # Try to use the LabelEncoder from previous cells if available
    le
except NameError:
    # If le is not defined, initialize and fit it now
    # Need access to the original full label list or combine y_train/y_test values
    # Assuming 'soil_df' (or a similar df with the 'label' column) is available
    # or y_train/y_test together cover all possible labels.
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    # Fit on all labels found in both train and test splits
    all_labels = pd.concat([y_train, y_test]).unique()
    le.fit(all_labels)


# Encode the target variables
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)

# Define model architecture
model = models.Sequential()
model.add(layers.Input(shape=(len(selected_features),)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
# Use the number of unique encoded classes for the output layer
model.add(layers.Dense(len(le.classes_), activation='softmax'))

# Compile the model
# Use sparse_categorical_crossentropy with integer labels
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Set up early stopping
early_stop = callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

# Train the model using the *encoded* target variable
history = model.fit(
    X_train[selected_features], y_train_encoded, # Use y_train_encoded here
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Plot training history
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

# Evaluate baseline performance on the test set using the *encoded* target variable
test_loss, test_acc = model.evaluate(X_test[selected_features], y_test_encoded, verbose=0) # Use y_test_encoded here
print('Baseline NN Test Loss:')
print(test_loss)
print('Baseline NN Test Accuracy:')
print(test_acc)

In [None]:
# Install TensorFlow if not present
import importlib
if not importlib.util.find_spec('tensorflow'):
    %pip install tensorflow
print('TensorFlow installation check complete')

In [None]:
# Build, train, and evaluate a simple Keras neural network
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder # Ensure LabelEncoder is imported if not already used

# --- Start of Added/Modified Code ---

# Re-initialize and fit LabelEncoder if needed, or ensure it's available
# Assuming 'le' from previous cells is available. If running this cell
# independently, you would need to fit it on the complete set of labels.
# A robust approach is to fit it on the original full label column.
try:
    le
except NameError:
    # If le is not defined, re-fit it.
    # This assumes you have access to the original data or combined y_train/y_test
    print("LabelEncoder 'le' not found, initializing and fitting.")
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    # Assuming access to the original DataFrame 'df' or soil_df
    # Or fit on the combined unique labels from y_train and y_test
    # Here we'll fit on the combined y_train and y_test for robustness in this cell
    import pandas as pd
    all_labels = pd.concat([y_train, y_test]).unique()
    le.fit(all_labels)


# Encode the target variables to integers
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)

# --- End of Added/Modified Code ---

# Model definition
nn_model = models.Sequential([
    layers.Input(shape=(len(selected_features),)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    # Use the number of unique classes from the LabelEncoder for the output layer
    layers.Dense(len(le.classes_), activation='softmax') # Use len(le.classes_)
])
nn_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
# Early stopping
es = callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
# Initial training
nn_history = nn_model.fit(
    X_train[selected_features],
    y_train_encoded,  # Use the encoded target variable here
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[es],
    verbose=1
)
# Plot history
dict_hist = nn_history.history
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(dict_hist['loss'], label='train_loss')
plt.plot(dict_hist['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss over Epochs')

plt.subplot(1,2,2)
plt.plot(dict_hist['accuracy'], label='train_acc')
plt.plot(dict_hist['val_accuracy'], label='val_acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy over Epochs')
plt.tight_layout()
plt.show()

# Baseline evaluation
# Evaluate using the encoded test target variable
baseline_loss, baseline_acc = nn_model.evaluate(X_test[selected_features], y_test_encoded, verbose=0) # Use y_test_encoded here
print('Baseline NN Test Loss:')
print(baseline_loss)
print('Baseline NN Test Accuracy:')
print(baseline_acc)

In [None]:
 # Compare RF (tuned) vs NN baseline: compute metrics and plot confusion matrices
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder # Ensure LabelEncoder is imported

# Predict with NN on test set
# The raw output is probabilities, argmax gets the integer class index
y_pred_nn_encoded = nn_model.predict(X_test[selected_features]).argmax(axis=1)

# --- Start of Added/Modified Code ---

# Decode the integer predictions back to original string labels
# Assuming 'le' LabelEncoder from previous cells is available and fitted on all classes
try:
    le
except NameError:
    # If le is not defined, re-fit it here for robustness
    print("LabelEncoder 'le' not found. Re-initializing and fitting on all unique labels.")
    le = LabelEncoder()
    import pandas as pd # Ensure pandas is imported
    # Fit on the combined unique labels from y_train and y_test
    all_labels = pd.concat([y_train, y_test]).unique()
    le.fit(all_labels)

y_pred_nn = le.inverse_transform(y_pred_nn_encoded)

# --- End of Added/Modified Code ---


def get_metrics(y_true, y_pred, average='weighted'):
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, average=average, zero_division=0),
        'Recall': recall_score(y_true, y_pred, average=average, zero_division=0),
        'F1 Score': f1_score(y_true, y_pred, average=average, zero_division=0)
    }

# RF tuned predictions exist as y_pred_rf and model tuned_rf
# If not, generate them:
try:
    y_pred_rf
except NameError:
    # This assumes tuned_rf and X_test[selected_features] are available
    print("y_pred_rf not found, generating predictions using tuned_rf.")
    y_pred_rf = tuned_rf.predict(X_test[selected_features])
except NameError:
     # If tuned_rf is also not found, train a default RF for comparison (less ideal)
     print("tuned_rf not found. Training a default RandomForestClassifier for comparison.")
     from sklearn.ensemble import RandomForestClassifier
     rf_default = RandomForestClassifier(n_estimators=100, random_state=42)
     # Need original X_train/y_train if tuned_rf was not found
     try:
         X_train, y_train
     except NameError:
         print("X_train or y_train not found. Cannot train default RF. Please ensure previous cells ran.")
         # Optionally, exit or raise an error if critical variables are missing
         raise

     rf_default.fit(X_train[selected_features], y_train)
     y_pred_rf = rf_default.predict(X_test[selected_features])


# Metrics
metrics_rf = get_metrics(y_test, y_pred_rf)
metrics_nn = get_metrics(y_test, y_pred_nn) # Now y_pred_nn are string labels

# Comparison table
df_comp = pd.DataFrame([
    {'Model': 'Random Forest (tuned)', **metrics_rf},
    {'Model': 'Neural Network (baseline)', **metrics_nn}
])

# Print table
print(df_comp)

# Plot confusion matrices
def plot_cm(y_true, y_pred, title):
    # Calculate confusion matrix - sklearn can handle string labels
    cm = confusion_matrix(y_true, y_pred)
    # Get unique sorted labels for heatmap axes
    labels = sorted(pd.concat([pd.Series(y_true), pd.Series(y_pred)]).unique())

    plt.figure(figsize=(len(labels)*0.8, len(labels)*0.7)) # Adjust size based on number of classes
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.tight_layout() # Adjust layout to prevent labels overlapping
    plt.show()

# Pass the original string labels and the decoded NN predictions to the plot function
plot_cm(y_test, y_pred_rf, 'Confusion Matrix - Random Forest (tuned)')
plot_cm(y_test, y_pred_nn, 'Confusion Matrix - Neural Network (baseline)')

In [None]:
# Install keras-tuner if not installed
import importlib
if not importlib.util.find_spec('kerastuner'):
    %pip install keras-tuner
print('Keras Tuner installation check complete')

In [None]:
# Hyperparameter tuning for Keras model using Keras Tuner
import kerastuner as kt
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.preprocessing import LabelEncoder # Ensure LabelEncoder is imported

# --- Start of Added/Modified Code ---

# Re-initialize and fit LabelEncoder if needed, or ensure it's available
# This section is for robustness if the cell is run independently.
# If previous cells defining and fitting 'le' were run, this block will
# check if 'le' exists. If not, it re-initializes and fits it.
try:
    le
except NameError:
    print("LabelEncoder 'le' not found. Re-initializing and fitting on all unique labels.")
    from sklearn.preprocessing import LabelEncoder
    import pandas as pd # Ensure pandas is imported
    le = LabelEncoder()
    # Fit on the combined unique labels from y_train and y_test
    # This assumes y_train and y_test (original string labels) are available
    try:
        all_labels = pd.concat([y_train, y_test]).unique()
        le.fit(all_labels)
    except NameError:
        print("Could not re-fit LabelEncoder: y_train or y_test not found.")
        print("Please ensure preceding cells defining y_train and y_test are run.")
        # You might want to raise an error or exit here if essential variables are missing
        raise


# Encode the target variable to integers for Keras Tuner
# Use the existing 'le' object to transform y_train
y_train_encoded = le.transform(y_train)

# Define the number of output classes for the Keras model
# This should be based on the number of unique labels in the encoded target
num_classes = len(le.classes_)

# --- End of Added/Modified Code ---


# Define the model builder function for tuner
# Modify the output layer to use 'num_classes'
def build_model(hp):
    model = keras.Sequential()
    # Input layer
    # Ensure selected_features is available; it should be from RFE step
    try:
        input_shape = (len(selected_features),)
    except NameError:
        print("Error: 'selected_features' is not defined. Please run the RFE step.")
        # Handle this error, perhaps by raising it or returning None
        raise

    model.add(layers.Input(shape=input_shape))
    # Tune number of hidden layers: 1 to 3
    for i in range(hp.Int('num_layers', 1, 3)):
        units = hp.Choice(f'units_{i}', [32, 64, 128])
        activation = hp.Choice(f'activation_{i}', ['relu', 'tanh'])
        model.add(layers.Dense(units, activation=activation))
    # Output layer
    # Use num_classes (derived from encoded labels) for the output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    # Tune learning rate for optimizer
    lr = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Instantiate the tuner with RandomSearch
# You might want to adjust max_trials based on computational resources
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10, # Keep trials relatively low for demonstration
    executions_per_trial=1,
    directory='tuner_dir',
    project_name='crop_nn_tuning'
)

# EarlyStopping callback
# Monitor validation accuracy and stop if it doesn't improve
stop_early = callbacks.EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True) # Added restore_best_weights

# Run the hyperparameter search using the *encoded* target variable
tuner.search(
    X_train[selected_features], # Use the selected features for X
    y_train_encoded,            # Use the encoded target variable here
    epochs=50,                  # Max epochs per trial
    validation_split=0.2,       # Use a split of the training data for validation during tuning
    callbacks=[stop_early],     # Apply early stopping
    batch_size=32,              # Batch size for training
    verbose=1                   # Show progress during search
)

# Summarize results
print("\nSearch results summary:")
tuner.results_summary()

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
# Save best hyperparameters to a JSON file
import json
best_hps_dict = best_hps.values
print("\nBest Hyperparameters:")
print(best_hps_dict)
with open('best_hps.json', 'w') as f:
    json.dump(best_hps_dict, f)

In [None]:
# Cell 1: Install tensorflow to support Keras Tuner
# Checking and installing TensorFlow if missing
import importlib
if not importlib.util.find_spec('tensorflow'):
    %pip install tensorflow
print('TensorFlow installation check complete')

In [None]:
# Load data and display head and summary
import pandas as pd
# Load dataset
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
# Display head and basic description
print(df.head())
print(df.describe(include='all'))

In [None]:
# Cell: Load dataset and display head and dtypes for initial inspection
import pandas as pd

# Load the dataset into df
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')

# Display the first five rows and the data types of each column
df_head = df.head()
df_dtypes = df.dtypes

# Print results separately
def display_results():
    print(df_head)
    print('\nColumn Data Types:')
    print(df_dtypes)

# Invoke display
display_results()

In [None]:
# Step 7: Compare Random Forest vs Neural Network
# 1. Preprocessing, splitting, training RF and NN with best hyperparameters
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras

# Load data (df is already in memory if re-running; otherwise reload)
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')

# Encode categorical columns
soil_le = LabelEncoder()
df['Soilcolor_enc'] = soil_le.fit_transform(df['Soilcolor'])

# Encode label
target_le = LabelEncoder()
df['label_enc'] = target_le.fit_transform(df['label'])

# Define features and target
def_cols = [col for col in df.columns if col not in ['Soilcolor','label','label_enc']]
X = df[def_cols]
y = df['label_enc']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# RF model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# RF predictions and metrics
y_pred_rf = rf.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred_rf)
rf_prec = precision_score(y_test, y_pred_rf, average='weighted')
rf_rec = recall_score(y_test, y_pred_rf, average='weighted')
rf_f1 = f1_score(y_test, y_pred_rf, average='weighted')

# Scale features for NN
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build NN using best hyperparameters: 2 layers [64, relu], [32, tanh], lr=0.001
def build_best_model(input_dim, num_classes):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(input_dim,)))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(32, activation='tanh'))
    model.add(keras.layers.Dense(num_classes, activation='softmax'))
    optimizer = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

num_classes = len(target_le.classes_)
input_dim = X_train_scaled.shape[1]

nn = build_best_model(input_dim, num_classes)
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

history = nn.fit(X_train_scaled, y_train,
                 validation_split=0.2,
                 epochs=50,
                 batch_size=32,
                 callbacks=[early_stop],
                 verbose=0)

# NN predictions and metrics
y_pred_nn_prob = nn.predict(X_test_scaled)
y_pred_nn = np.argmax(y_pred_nn_prob, axis=1)
nn_acc = accuracy_score(y_test, y_pred_nn)
nn_prec = precision_score(y_test, y_pred_nn, average='weighted')
nn_rec = recall_score(y_test, y_pred_nn, average='weighted')
nn_f1 = f1_score(y_test, y_pred_nn, average='weighted')

# Create comparison table
metrics_df = pd.DataFrame({
    'Model': ['Random Forest', 'Neural Network'],
    'Accuracy': [rf_acc, nn_acc],
    'Precision': [rf_prec, nn_prec],
    'Recall': [rf_rec, nn_rec],
    'F1 Score': [rf_f1, nn_f1]
})

print(metrics_df)

# Plot confusion matrices
cm_rf = confusion_matrix(y_test, y_pred_rf)
cm_nn = confusion_matrix(y_test, y_pred_nn)

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

sns.heatmap(cm_rf, annot=True, fmt='d', ax=axes[0], cmap='Blues')
axes[0].set_title('RF Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')

sns.heatmap(cm_nn, annot=True, fmt='d', ax=axes[1], cmap='Greens')
axes[1].set_title('NN Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')

plt.tight_layout()
plt.show()

print('Comparison and confusion matrices generated.')

In [None]:
# Install TensorFlow for NN
# Checking if tensorflow is available
import importlib
if importlib.util.find_spec('tensorflow') is None:
    %pip install tensorflow
print('TensorFlow installation step completed.')

In [None]:
# Rerun RF and NN comparison now that TensorFlow is installed
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras

# Load data
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
# Encode
df['Soilcolor_enc'] = LabelEncoder().fit_transform(df['Soilcolor'])
df['label_enc'] = LabelEncoder().fit_transform(df['label'])
# Features/target
X = df.drop(columns=['Soilcolor','label','label_enc'])
y = df['label_enc']
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# RF
tf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
tf_rf.fit(X_train, y_train)
y_pred_rf = tf_rf.predict(X_test)
# Metrics RF
rf_acc=accuracy_score(y_test,y_pred_rf)
rf_prec=precision_score(y_test,y_pred_rf,average='weighted')
rf_rec=recall_score(y_test,y_pred_rf,average='weighted')
rf_f1=f1_score(y_test,y_pred_rf,average='weighted')
# Scale for NN
scaler=StandardScaler().fit(X_train)
X_train_s=scaler.transform(X_train)
X_test_s=scaler.transform(X_test)
# NN
def build(input_dim, num_classes):
    model=keras.Sequential([keras.layers.Input(shape=(input_dim,)),
                             keras.layers.Dense(64,activation='relu'),
                             keras.layers.Dense(32,activation='tanh'),
                             keras.layers.Dense(num_classes,activation='softmax')])
    model.compile(optimizer=keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model
num_classes=len(df['label_enc'].unique())
model=build(X_train_s.shape[1],num_classes)
e=keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=5,restore_best_weights=True)
model.fit(X_train_s,y_train,validation_split=0.2,epochs=50,batch_size=32,callbacks=[e],verbose=0)
# Predictions NN
y_pred_nn=np.argmax(model.predict(X_test_s),axis=1)
nn_acc=accuracy_score(y_test,y_pred_nn)
nn_prec=precision_score(y_test,y_pred_nn,average='weighted')
nn_rec=recall_score(y_test,y_pred_nn,average='weighted')
nn_f1=f1_score(y_test,y_pred_nn,average='weighted')
# Table
metrics_df=pd.DataFrame({'Model':['Random Forest','Neural Network'],
                          'Accuracy':[rf_acc,nn_acc],
                          'Precision':[rf_prec,nn_prec],
                          'Recall':[rf_rec,nn_rec],
                          'F1 Score':[rf_f1,nn_f1]})
print(metrics_df)
# Confusion matrices
cm_rf=confusion_matrix(y_test,y_pred_rf)
cm_nn=confusion_matrix(y_test,y_pred_nn)
fig,axes=plt.subplots(1,2,figsize=(12,5))
sns.heatmap(cm_rf,annot=True,fmt='d',ax=axes[0],cmap='Blues')
axes[0].set_title('RF Conf Matrix');axes[0].set_xlabel('Pred');axes[0].set_ylabel('True')
sns.heatmap(cm_nn,annot=True,fmt='d',ax=axes[1],cmap='Greens')
axes[1].set_title('NN Conf Matrix');axes[1].set_xlabel('Pred');axes[1].set_ylabel('True')
plt.tight_layout()
plt.show()
print('Done')

In [None]:
# Compare Random Forest vs MLPClassifier as Neural Network fallback
# 1. Load and encode data
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
# Encode categorical
df['Soilcolor_enc'] = LabelEncoder().fit_transform(df['Soilcolor'])
df['label_enc'] = LabelEncoder().fit_transform(df['label'])
# Features and target
X = df.drop(columns=['Soilcolor','label','label_enc'])
y = df['label_enc']
# Split
tX, tX_test, ty, ty_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(tX, ty)
pred_rf = rf.predict(tX_test)
# RF metrics
rf_acc = accuracy_score(ty_test, pred_rf)
rf_prec = precision_score(ty_test, pred_rf, average='weighted')
rf_rec = recall_score(ty_test, pred_rf, average='weighted')
rf_f1 = f1_score(ty_test, pred_rf, average='weighted')

# Scale for MLP
scaler = StandardScaler().fit(tX)
tX_s = scaler.transform(tX)
tX_test_s = scaler.transform(tX_test)
# MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(64,32), activation='relu', solver='adam', learning_rate_init=0.001, max_iter=200, random_state=42)
mlp.fit(tX_s, ty)
pred_mlp = mlp.predict(tX_test_s)
# MLP metrics
mlp_acc = accuracy_score(ty_test, pred_mlp)
mlp_prec = precision_score(ty_test, pred_mlp, average='weighted')
mlp_rec = recall_score(ty_test, pred_mlp, average='weighted')
mlp_f1 = f1_score(ty_test, pred_mlp, average='weighted')

# Comparison table
metrics_df = pd.DataFrame({
    'Model': ['Random Forest', 'Neural Network (MLP)'],
    'Accuracy': [rf_acc, mlp_acc],
    'Precision': [rf_prec, mlp_prec],
    'Recall': [rf_rec, mlp_rec],
    'F1 Score': [rf_f1, mlp_f1]
})
print(metrics_df)

# Confusion matrices
cm_rf = confusion_matrix(ty_test, pred_rf)
cm_mlp = confusion_matrix(ty_test, pred_mlp)
fig, axes = plt.subplots(1, 2, figsize=(12,5))
sns.heatmap(cm_rf, annot=True, fmt='d', ax=axes[0], cmap='Blues')
axes[0].set_title('Random Forest Confusion Matrix')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
sns.heatmap(cm_mlp, annot=True, fmt='d', ax=axes[1], cmap='Greens')
axes[1].set_title('MLP Neural Network Confusion Matrix')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')
plt.tight_layout()
plt.show()

print('Completed comparison table and confusion matrices.')

In [None]:
# Create a Gradio frontend for crop prediction using the trained Random Forest model

# Install gradio if not installed
try:
    import gradio as gr
except ModuleNotFoundError:
    !pip install gradio
    import gradio as gr # Import after installation

import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

def load_resources():
    # Load dataset to get feature names and encoder
    df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
    le_color = LabelEncoder().fit(df['Soilcolor'])
    le_label = LabelEncoder().fit(df['label'])
    # Prepare data
    df['Soilcolor_enc'] = le_color.transform(df['Soilcolor'])
    # Encode label column BEFORE dropping it - FIX
    df['label_enc'] = le_label.transform(df['label'])
    X = df.drop(columns=['Soilcolor','label','label_enc'])
    scaler = StandardScaler().fit(X)
    # Train RF again (or load a persisted model)
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    # Use the encoded label for training
    y = df['label_enc'] # Use the newly created 'label_enc' column
    rf.fit(X, y)
    return rf, scaler, le_color, le_label, X.columns.tolist()

rf_model, scaler, le_color, le_label, feature_names = load_resources()

# Prediction function for Gradio
def predict_crop(pH, Nitrogen, Phosphorous, Potassium, Rainfall, Temperature, Humidity, Soilcolor):
    # Encode soilcolor
    sc_enc = int(le_color.transform([Soilcolor])[0])
    # Prepare feature vector
    features = [pH, Nitrogen, Phosphorous, Potassium, Rainfall, Temperature, Humidity, sc_enc]
    # Scale
    features_scaled = scaler.transform([features])
    # Predict
    pred_enc = rf_model.predict(features_scaled)[0]
    crop = le_label.inverse_transform([pred_enc])[0]
    return crop

# Build Gradio interface
demo = gr.Interface(
    fn=predict_crop,
    inputs=[
        gr.Number(label='pH'),
        gr.Number(label='Nitrogen'),
        gr.Number(label='Phosphorous'),
        gr.Number(label='Potassium'),
        gr.Number(label='Rainfall'),
        gr.Number(label='Temperature'),
        gr.Number(label='Humidity'),
        gr.Dropdown(choices=le_color.classes_.tolist(), label='Soil Color')
    ],
    outputs=gr.Textbox(label='Recommended Crop'),
    title='Crop Recommendation System',
    description='Enter soil and weather parameters to get a recommended crop.'
)

demo.launch(server_name='0.0.0.0')
print('Gradio frontend created and running.')

In [None]:
# Cell 1: Install Gradio
# Installing gradio if not present
iimport = None  # placeholder to check import
try:
    import gradio
    print('Gradio already installed')
except ModuleNotFoundError:
    import sys
    from subprocess import run
    run([sys.executable, '-m', 'pip', 'install', 'gradio'], check=True)
    print('Installed gradio')

In [None]:
# Cell 2: Import Gradio and build the interface object for the crop recommender
import gradio as gr

def build_gradio_interface(rf_model, scaler, le_color, le_label):
    def predict_crop(pH, Nitrogen, Phosphorous, Potassium, Rainfall, Temperature, Humidity, Soilcolor):
        sc_enc = int(le_color.transform([Soilcolor])[0])
        features = [pH, Nitrogen, Phosphorous, Potassium, Rainfall, Temperature, Humidity, sc_enc]
        features_scaled = scaler.transform([features])
        pred_enc = rf_model.predict(features_scaled)[0]
        return le_label.inverse_transform([pred_enc])[0]
    interface = gr.Interface(
        fn=predict_crop,
        inputs=[
            gr.Number(label='pH'),
            gr.Number(label='Nitrogen'),
            gr.Number(label='Phosphorous'),
            gr.Number(label='Potassium'),
            gr.Number(label='Rainfall'),
            gr.Number(label='Temperature'),
            gr.Number(label='Humidity'),
            gr.Dropdown(choices=le_color.classes_.tolist(), label='Soil Color')
        ],
        outputs=gr.Textbox(label='Recommended Crop'),
        title='Crop Recommendation System',
        description='Enter soil and weather parameters to get a recommended crop.'
    )
    return interface

# Build the interface
demo = build_gradio_interface(rf_model, scaler, le_color, le_label)
print('Gradio interface object created.')

In [None]:
# Cell 1: Load data, preprocess, and train Random Forest model
# We load the CSV, encode categorical features, standardize, and fit a RandomForestClassifier.
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Load dataset
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
# Initialize encoders
e_color = LabelEncoder().fit(df['Soilcolor'])
e_label = LabelEncoder().fit(df['label'])
# Encode and prepare features
df['Soilcolor_enc'] = e_color.transform(df['Soilcolor'])
X = df[['Ph','N','P','K','rainfall','temperature','humidity','Soilcolor_enc']] # Changed 'pH' to 'Ph' y = e_label.transform(df['label'])
y = e_label.transform(df['label'])
# Scale features
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)
# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_scaled, y)

# Display head of processed features
print(X.head())

In [None]:
# Cell 1: Load data, preprocess, and train Random Forest model
# We load the CSV, encode categorical features, standardize, and fit a RandomForestClassifier.
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Load dataset
df = pd.read_csv('Crop Recommendation using Soil Properties and Weather Prediction (3).csv')
# Initialize encoders
e_color = LabelEncoder().fit(df['Soilcolor'])
e_label = LabelEncoder().fit(df['label'])
# Encode and prepare features
df['Soilcolor_enc'] = e_color.transform(df['Soilcolor'])

# Correct column names for X features (ensure capitalization matches the DataFrame)
# EXCLUDE the original 'Soilcolor' column and INCLUDE the 'Soilcolor_enc' column
X = df[['Ph','N','P','K','T2M_MAX-W','T2M_MAX-Sp','T2M_MAX-Su','T2M_MAX-Au','T2M_MIN-W','PRECTOTCORR-W','Soilcolor_enc']]
# Separate the assignment of y to a new line
y = e_label.transform(df['label'])

# Scale features
scaler = StandardScaler().fit(X) # This will now work as X contains only numeric columns
X_scaled = scaler.transform(X)
# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_scaled, y)

# Display head of processed features
print(X.head())

In [None]:
# Cell: Create and launch Gradio app for the trained Random Forest model
import gradio as gr

def predict_crop(pH, N, P, K, Zn, S, soilcolor):
    # Encode soil color
    soil_enc = e_color.transform([soilcolor])[0]
    features = [pH, N, P, K, Zn, S, soil_enc]
    # Scale features
    features_scaled = scaler.transform([features])
    # Predict and decode label
    pred_enc = rf_model.predict(features_scaled)[0]
    return e_label.inverse_transform([pred_enc])[0]

# Define Gradio interface inputs and outputs
demo = gr.Interface(
    fn=predict_crop,
    inputs=[
        gr.Number(label='pH'),
        gr.Number(label='N'),
        gr.Number(label='P'),
        gr.Number(label='K'),
        gr.Number(label='Zn'),
        gr.Number(label='S'),
        gr.Dropdown(choices=e_color.classes_.tolist(), label='Soil Color')
    ],
    outputs=gr.Textbox(label='Recommended Crop'),
    title='Crop Recommendation System',
    description='Enter soil parameters to get a recommended crop (Random Forest model).'
)

# Launch the app
demo.launch(share=True)  # share=True can be added if needed
print('Gradio app launched.')

In [1]:
# Cell: Define and launch a full-feature Gradio app including seasonal weather features
import gradio as gr

def predict_crop_full(Ph, N, P, K, Zn, S,
                      QV2M_W, QV2M_Sp, QV2M_Su, QV2M_Au,
                      T2M_MAX_W, T2M_MAX_Sp, T2M_MAX_Su, T2M_MAX_Au,
                      T2M_MIN_W, T2M_MIN_Sp, T2M_MIN_Su, T2M_MIN_Au,
                      PRECTOTCORR_W, PRECTOTCORR_Sp, PRECTOTCORR_Su, PRECTOTCORR_Au,
                      WD10M, GWETTOP, CLOUD_AMT, WS2M_RANGE, PS,
                      Soilcolor):
    # Encode soil color
    soil_enc = e_color.transform([Soilcolor])[0]
    # Build feature vector in correct order
    features = [Ph, K, P, N, Zn, S,
                QV2M_W, QV2M_Sp, QV2M_Su, QV2M_Au,
                T2M_MAX_W, T2M_MAX_Sp, T2M_MAX_Su, T2M_MAX_Au,
                T2M_MIN_W, T2M_MIN_Sp, T2M_MIN_Su, T2M_MIN_Au,
                PRECTOTCORR_W, PRECTOTCORR_Sp, PRECTOTCORR_Su, PRECTOTCORR_Au,
                WD10M, GWETTOP, CLOUD_AMT, WS2M_RANGE, PS,
                soil_enc]
    # Scale and predict
    scaled = scaler.transform([features])
    pred = rf_model.predict(scaled)[0]
    return e_label.inverse_transform([pred])[0]

# Define inputs list
inputs = [
    gr.Number(label='Ph'), gr.Number(label='N'), gr.Number(label='P'), gr.Number(label='K'),
    gr.Number(label='Zn'), gr.Number(label='S'),
    gr.Number(label='QV2M-W'), gr.Number(label='QV2M-Sp'), gr.Number(label='QV2M-Su'), gr.Number(label='QV2M-Au'),
    gr.Number(label='T2M_MAX-W'), gr.Number(label='T2M_MAX-Sp'), gr.Number(label='T2M_MAX-Su'), gr.Number(label='T2M_MAX-Au'),
    gr.Number(label='T2M_MIN-W'), gr.Number(label='T2M_MIN-Sp'), gr.Number(label='T2M_MIN-Su'), gr.Number(label='T2M_MIN-Au'),
    gr.Number(label='PRECTOTCORR-W'), gr.Number(label='PRECTOTCORR-Sp'), gr.Number(label='PRECTOTCORR-Su'), gr.Number(label='PRECTOTCORR-Au'),
    gr.Number(label='WD10M'), gr.Number(label='GWETTOP'), gr.Number(label='CLOUD_AMT'), gr.Number(label='WS2M_RANGE'), gr.Number(label='PS'),
    gr.Dropdown(label='Soilcolor', choices=e_color.classes_.tolist())
]

demo_full = gr.Interface(fn=predict_crop_full,
                         inputs=inputs,
                         outputs=gr.Textbox(label='Recommended Crop'),
                         title='Full Crop Recommendation',
                         description='Provide soil and seasonal weather parameters to predict the optimal crop.')

demo_full.launch(share=True)
print('Full Gradio frontend launched.')

NameError: name 'e_color' is not defined