<a href="https://colab.research.google.com/github/SahilLokhande2604/Crop_Yield_Prediction/blob/main/Crop_Yield_Using_Shap_Feature_Extraction_for_top_10_crops.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Feature Inportance using shap for top 10 crops


In [None]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [None]:
!pip install shap



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras import layers, models
import shap
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('crop_yield.csv')

# Identifying features and target
target = 'Yield'
features = ['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']

X = data[features]
y = data[target]

# Splitting data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing Pipeline:
# 1. OneHotEncode categorical variables
# 2. Standardize numerical variables
categorical_features = ['Crop', 'Season', 'State']
numerical_features = ['Crop_Year', 'Area', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']

# Define Column Transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Fit and transform the training data
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Define the neural network architecture
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_preprocessed.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train_preprocessed, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 584726.1250 - mae: 72.7376 - val_loss: 515559.7812 - val_mae: 126.5494
Epoch 2/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 263190.1875 - mae: 75.8955 - val_loss: 61837.9531 - val_mae: 37.7552
Epoch 3/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 85602.2344 - mae: 33.0691 - val_loss: 36338.7578 - val_mae: 29.2731
Epoch 4/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 30205.0957 - mae: 23.1566 - val_loss: 26861.1465 - val_mae: 21.7608
Epoch 5/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 45976.1250 - mae: 22.3057 - val_loss: 24246.0020 - val_mae: 19.9377
Epoch 6/20
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 28180.9629 - mae: 19.1818 - val_loss: 21357.7988 - val_mae: 19.2687
Epoch 7/20
[1m394/394[

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Rice
Analyzing feature importance for Rice


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step


  0%|          | 0/1197 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Turmeric

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Turmeric
Analyzing feature importance for Turmeric


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


  0%|          | 0/337 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Sugarcane

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Sugarcane
Analyzing feature importance for Sugarcane


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


  0%|          | 0/605 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Castor seed

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Castor seed
Analyzing feature importance for Castor seed


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  0%|          | 0/300 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m649/649[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Cashewnut

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Cashewnut
Analyzing feature importance for Cashewnut


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  0%|          | 0/134 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Tobacco

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Tobacco
Analyzing feature importance for Tobacco


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  0%|          | 0/364 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


# Maize

In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

Enter the name of the crop you want to analyze: Maize
Analyzing feature importance for Maize


In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  0%|          | 0/975 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m648/648[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()


In [None]:
import numpy as np
# Get user input for the crop name
crop_name = input("Enter the name of the crop you want to analyze: ")

print(f"Analyzing feature importance for {crop_name}")

In [None]:

# Subset data for the specific crop
crop_data = data[data['Crop'] == crop_name]

# Skip crops with too few samples
if len(crop_data) < 10:
    print(f"Skipping {crop_name} due to insufficient data (less than 10 samples)")
else:
    # Preprocess the crop-specific data
    X_crop = crop_data[features]
    X_crop_preprocessed = preprocessor.transform(X_crop)

    # Use k-means to summarize the background data (e.g., to 10 clusters)
    # Use shape[0] to get the number of rows in the sparse matrix
    num_clusters = min(10, X_crop_preprocessed.shape[0])  # Use fewer clusters if the crop data is small
    background_crop = shap.kmeans(X_crop_preprocessed, num_clusters)  # Summarize the background data

    # SHAP explainer with summarized background
    explainer = shap.KernelExplainer(model.predict, background_crop)


# Compute SHAP values for the crop-specific data
shap_values_crop = explainer.shap_values(X_crop_preprocessed)

# Get SHAP values as a NumPy array for easier manipulation
shap_values_array = np.array(shap_values_crop)

# Get original feature names from the ColumnTransformer
original_feature_names = numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))

# Calculate mean absolute SHAP values for each feature, and flatten the array
mean_abs_shap_values = np.mean(np.abs(shap_values_array), axis=0).flatten()


In [None]:
import plotly.graph_objects as go

# Assuming original_feature_names and mean_abs_shap_values are defined

# Create a bar plot using Plotly
fig = go.Figure()

# Add bars to the figure
fig.add_trace(go.Bar(
    x=original_feature_names,
    y=mean_abs_shap_values,
    marker=dict(color='blue'),
    hovertemplate='<b>%{x}</b><br>Mean Absolute SHAP Value: %{y}<extra></extra>',
))

# Update layout
fig.update_layout(
    title=f"Feature Importance for {crop_name}",
    xaxis_title="Features",
    yaxis_title="Mean Absolute SHAP Value",
    xaxis_tickangle=-45,  # Rotate x-axis labels
)

# Show the plot
fig.show()
