In [1]:
%run ./_setup.py
import os
import sys

Python: c:\Users\Admin\Desktop\Irrigation Water Requirement Prediction\.venv\Scripts\python.exe
Environment ready
Shape: (10000, 20)
<class 'pandas.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Soil_Type                10000 non-null  str    
 1   Soil_pH                  10000 non-null  float64
 2   Soil_Moisture            10000 non-null  float64
 3   Organic_Carbon           10000 non-null  float64
 4   Electrical_Conductivity  10000 non-null  float64
 5   Temperature_C            10000 non-null  float64
 6   Humidity                 10000 non-null  float64
 7   Rainfall_mm              10000 non-null  float64
 8   Sunlight_Hours           10000 non-null  float64
 9   Wind_Speed_kmh           10000 non-null  float64
 10  Crop_Type                10000 non-null  str    
 11  Crop_Growth_Stage        10000 non-null  str    
 12  Season   

## Import Libraries

In [1]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import numpy as np
import pandas as pd
df = pd.read_csv("../data/irrigation_prediction.csv")

In [3]:
# Set Plotly theme
pio.templates.default = "plotly_white"

In [4]:
# Display column names to see exact format
print("Column names in dataset:")
print(df.columns.tolist())
print(f"\nNumber of rows: {len(df)}")
print(f"Number of columns: {len(df.columns)}")


Column names in dataset:
['Soil_Type', 'Soil_pH', 'Soil_Moisture', 'Organic_Carbon', 'Electrical_Conductivity', 'Temperature_C', 'Humidity', 'Rainfall_mm', 'Sunlight_Hours', 'Wind_Speed_kmh', 'Crop_Type', 'Crop_Growth_Stage', 'Season', 'Irrigation_Type', 'Water_Source', 'Field_Area_hectare', 'Mulching_Used', 'Previous_Irrigation_mm', 'Region', 'Irrigation_Need']

Number of rows: 10000
Number of columns: 20


In [5]:
# Ensure categorical columns are properly typed
categorical_cols = ['Soil_Type', 'Crop_Type', 'Crop_Growth_Stage', 'Season', 
                    'Irrigation_Type', 'Water_Source', 'Mulching_Used', 'Region', 
                    'Irrigation_Need']

for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].astype('category')
    else:
        print(f"Warning: Column '{col}' not found in dataset")

In [6]:
# Create a color palette
soil_color_map = {'Clay': '#8B4513', 'Silt': '#D2B48C', 'Sandy': '#F4A460', 'Loamy': '#556B2F'}
irrigation_color_map = {'Low': '#2E8B57', 'Medium': '#FFA500', 'High': '#DC143C'}
region_color_map = {'North': '#1f77b4', 'South': '#ff7f0e', 'East': '#2ca02c', 
                    'West': '#d62728', 'Central': '#9467bd'}
crop_color_map = {'Wheat': '#FFD700', 'Maize': '#32CD32', 'Cotton': '#FFFFFF', 
                  'Rice': '#87CEEB', 'Sugarcane': '#D2691E', 'Potato': '#8B0000'}

## 1. Soil Type vs Soil Moisture Distribution

In [7]:
print("\nCreating Plot 1: Soil Type vs Soil Moisture Distribution...")

# Use the exact column names from your dataset
fig1 = px.box(df, x='Soil_Type', y='Soil_Moisture', 
              color='Irrigation_Need',
              color_discrete_map=irrigation_color_map,
              title='<b>1. Soil Moisture Distribution by Soil Type and Irrigation Need</b><br><sup>Different soils retain moisture differently affecting irrigation requirements</sup>',
              labels={'Soil_Moisture': 'Soil Moisture (%)', 'Soil_Type': 'Soil Type'},
              hover_data=['Crop_Type', 'Region', 'Rainfall_mm'],  # Use exact column names
              points='outliers',  # Shows only outliers for large dataset
              category_orders={'Soil_Type': ['Clay', 'Silt', 'Sandy', 'Loamy'],
                              'Irrigation_Need': ['Low', 'Medium', 'High']})

fig1.update_layout(
    height=600,
    xaxis_title="Soil Type",
    yaxis_title="Soil Moisture (%)",
    legend_title="Irrigation Need",
    boxmode='group',
    hovermode='closest',
    showlegend=True,
    plot_bgcolor='white',
    font=dict(size=12)
)

# Add mean line annotations
soil_types = ['Clay', 'Silt', 'Sandy', 'Loamy']
for i, soil_type in enumerate(soil_types):
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        mean_val = subset['Soil_Moisture'].mean()
        fig1.add_shape(
            type='line',
            x0=i-0.4, x1=i+0.4,
            y0=mean_val, y1=mean_val,
            line=dict(color='black', width=2, dash='dash')
        )
        # Add annotation for mean value
        fig1.add_annotation(
            x=i,
            y=mean_val + (df['Soil_Moisture'].max() - df['Soil_Moisture'].min()) * 0.02,
            text=f'Avg: {mean_val:.1f}%',
            showarrow=False,
            font=dict(size=10, color='black', family='Arial'),
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1,
            borderpad=3
        )

print("✓ Plot 1 created successfully")
fig1.show()


Creating Plot 1: Soil Type vs Soil Moisture Distribution...
✓ Plot 1 created successfully


## 2. Soil pH vs Organic Carbon Relationship

In [8]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

print("\nCreating Plot 2: Soil pH vs Organic Carbon Relationship...")

fig2 = px.scatter(df, x='Soil_pH', y='Organic_Carbon',
                  color='Soil_Type',
                  color_discrete_map=soil_color_map,
                  size='Field_Area_hectare',
                  size_max=30,
                  hover_name='Crop_Type',
                  hover_data=['Region', 'Irrigation_Need', 'Soil_Moisture'],
                  title='<b>2. Soil pH vs Organic Carbon Content by Soil Type</b><br><sup>Loamy soils show optimal pH-Organic Carbon balance</sup>',
                  trendline='ols',
                  trendline_scope='overall',
                  opacity=0.6,
                  labels={'Soil_pH': 'Soil pH', 'Organic_Carbon': 'Organic Carbon (% mg/L)'},
                  category_orders={'Soil_Type': ['Clay', 'Silt', 'Sandy', 'Loamy']})

fig2.update_layout(
    height=600,
    xaxis=dict(title='Soil pH', range=[df['Soil_pH'].min()-0.5, df['Soil_pH'].max()+0.5]),
    yaxis=dict(title='Organic Carbon (% mg/L)'),
    legend_title="Soil Type",
    hovermode='closest',
    plot_bgcolor='white',
    font=dict(size=12)
)

# Add optimal pH range shading (6.0-7.5 for most crops)
fig2.add_vrect(x0=6.0, x1=7.5, 
               fillcolor="green", opacity=0.1, 
               line_width=0, 
               annotation_text="Optimal pH Range (6.0-7.5)",
               annotation_position="top left",
               annotation_font_size=12,
               annotation_font_color="green")

# Add optimal OC range
fig2.add_hrect(y0=1.0, y1=3.0,
               fillcolor="blue", opacity=0.1,
               line_width=0,
               annotation_text="Good OC Range (1.0-3.0%)",
               annotation_position="bottom right",
               annotation_font_size=12,
               annotation_font_color="blue")

print("✓ Plot 2 created successfully")
fig2.show()


Creating Plot 2: Soil pH vs Organic Carbon Relationship...
✓ Plot 2 created successfully


## 3. Electrical Conductivity Distribution by Soil Type and Region

In [9]:
print("\nCreating Plot 3: Electrical Conductivity Analysis...")

fig3 = make_subplots(
    rows=1, cols=2,
    subplot_titles=('<b>EC Distribution by Soil Type</b>', '<b>EC by Region & Soil Type</b>'),
    specs=[[{'type': 'violin'}, {'type': 'box'}]],
    horizontal_spacing=0.1
)

# Violin plot for EC distribution by Soil Type
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        fig3.add_trace(
            go.Violin(x=subset['Soil_Type'], y=subset['Electrical_Conductivity'],
                     name=soil_type, 
                     fillcolor=soil_color_map.get(soil_type, '#000000'),
                     line_color='black', 
                     opacity=0.7, 
                     box_visible=True, 
                     meanline_visible=True,
                     showlegend=False),
            row=1, col=1
        )

# Grouped box plot for EC by Region and Soil Type
for soil_type in ['Clay', 'Silt']:  # Show only 2 soil types for clarity
    for region in df['Region'].cat.categories[:3]:  # Show only top 3 regions
        subset = df[(df['Soil_Type'] == soil_type) & (df['Region'] == region)]
        if len(subset) > 0:
            fig3.add_trace(
                go.Box(y=subset['Electrical_Conductivity'],
                      name=f"{soil_type} - {region}",
                      marker_color=region_color_map.get(region, '#000000'),
                      opacity=0.7, 
                      showlegend=False),
                row=1, col=2
            )

fig3.update_layout(
    height=600,
    title_text='<b>3. Soil Salinity Analysis: Electrical Conductivity Patterns</b><br><sup>Higher EC indicates higher soil salinity</sup>',
    yaxis_title="Electrical Conductivity (dS/m)",
    yaxis2_title="Electrical Conductivity (dS/m)",
    violingap=0.1,
    violinmode='group',
    boxmode='group',
    plot_bgcolor='white'
)

# Add salinity threshold lines
salinity_thresholds = {'Low': 0.8, 'Moderate': 2.0, 'High': 4.0}
for threshold_name, threshold_value in salinity_thresholds.items():
    colors = {'Low': 'green', 'Moderate': 'orange', 'High': 'red'}
    fig3.add_hline(y=threshold_value, line_dash="dash", 
                   line_color=colors.get(threshold_name, 'gray'), 
                   annotation_text=f"{threshold_name} Salinity",
                   row=1, col=1,
                   annotation_font_size=10)

print("✓ Plot 3 created successfully")
fig3.show()


Creating Plot 3: Electrical Conductivity Analysis...
✓ Plot 3 created successfully


## 4. Organic Carbon vs Yield Potential Bubble Chart

In [10]:
print("\nCreating Plot 4: Organic Carbon vs Yield Potential...")

# Create a yield proxy (composite index based on multiple factors)
df['Yield_Proxy'] = (
    df['Soil_Moisture'].clip(0, 100) * 0.25 +  # Moisture contributes 25%
    df['Organic_Carbon'].clip(0, 10) * 30 +     # OC contributes 30% (scaled up)
    (100 - abs(df['Soil_pH'] - 6.5) * 10).clip(0, 100) * 0.25 +  # pH optimality 25%
    (100 - df['Electrical_Conductivity'].clip(0, 10) * 10).clip(0, 100) * 0.20  # Low EC good
)

fig4 = px.scatter(df, x='Organic_Carbon', y='Yield_Proxy',
                  size='Field_Area_hectare',
                  color='Soil_Type',
                  color_discrete_map=soil_color_map,
                  hover_name='Crop_Type',
                  hover_data=['Region', 'Irrigation_Need', 'Soil_pH'],
                  title='<b>4. Organic Carbon Impact on Yield Potential</b><br><sup>Bubble size = Field Area | Higher OC generally increases yield potential</sup>',
                  size_max=40,
                  facet_col='Irrigation_Need',
                  category_orders={'Irrigation_Need': ['Low', 'Medium', 'High'],
                                  'Soil_Type': ['Clay', 'Silt', 'Sandy', 'Loamy']},
                  labels={'Organic_Carbon': 'Organic Carbon (% mg/L)',
                         'Yield_Proxy': 'Yield Potential Index (0-100)'})

fig4.update_layout(
    height=500,
    xaxis_title="Organic Carbon (% mg/L)",
    yaxis_title="Yield Potential Index",
    legend_title="Soil Type",
    hovermode='closest',
    plot_bgcolor='white'
)

# Add optimal OC range for each facet
for i, irrigation_need in enumerate(['Low', 'Medium', 'High'], 1):
    fig4.add_vrect(x0=1.0, x1=3.0, 
                   fillcolor="green", opacity=0.1, line_width=0,
                   row=1, col=i)

print("✓ Plot 4 created successfully")
fig4.show()


Creating Plot 4: Organic Carbon vs Yield Potential...
✓ Plot 4 created successfully


## 5. Soil pH Distribution by Region

In [11]:
print("\nCreating Plot 5: Regional Soil pH Distribution...")

# Create a sample for better performance if dataset is large
if len(df) > 2000:
    plot_df = df.sample(2000, random_state=42)
else:
    plot_df = df

fig5 = px.histogram(plot_df, x='Soil_pH', color='Region',
                    color_discrete_map=region_color_map,
                    facet_row='Soil_Type',
                    nbins=30,
                    opacity=0.7,
                    title='<b>5. Regional Soil pH Distribution Patterns</b><br><sup>pH distribution varies by region and soil type</sup>',
                    hover_data=['Crop_Type', 'Irrigation_Need'],
                    labels={'Soil_pH': 'Soil pH'},
                    category_orders={'Soil_Type': ['Clay', 'Silt', 'Sandy', 'Loamy']})

fig5.update_layout(
    height=800,
    xaxis_title="Soil pH",
    yaxis_title="Count",
    legend_title="Region",
    bargap=0.1,
    plot_bgcolor='white'
)

# Add optimal pH range line
fig5.add_vline(x=6.5, line_dash="dash", line_color="green", 
               annotation_text="Optimal pH (6.5)", 
               annotation_position="top right",
               opacity=0.7)

# Update facet titles
fig5.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

print("✓ Plot 5 created successfully")
fig5.show()


Creating Plot 5: Regional Soil pH Distribution...
✓ Plot 5 created successfully


## 6. Electrical Conductivity by Irrigation Type and Water Source

In [12]:
print("\nCreating Plot 6: EC by Irrigation Type and Water Source...")

fig6 = px.box(df, x='Irrigation_Type', y='Electrical_Conductivity',
              color='Water_Source',
              points='outliers',
              title='<b>6. Soil Salinity by Irrigation Method and Water Source</b><br><sup>Groundwater often has higher salinity than surface water</sup>',
              hover_data=['Soil_Type', 'Crop_Type', 'Region'],
              color_discrete_sequence=px.colors.qualitative.Set2,
              labels={'Electrical_Conductivity': 'EC (dS/m)', 
                     'Irrigation_Type': 'Irrigation Type'},
              category_orders={'Irrigation_Type': ['Rainfed', 'Canal', 'Drip', 'Sprinkler']})

fig6.update_layout(
    height=600,
    xaxis_title="Irrigation Type",
    yaxis_title="Electrical Conductivity (dS/m)",
    legend_title="Water Source",
    boxmode='group',
    plot_bgcolor='white'
)

# Add salinity classification bands
fig6.add_hrect(y0=0, y1=0.8, fillcolor="green", opacity=0.1, line_width=0,
               annotation_text="Non-saline", annotation_position="left",
               annotation_font_size=10)
fig6.add_hrect(y0=0.8, y1=2.0, fillcolor="yellow", opacity=0.1, line_width=0,
               annotation_text="Slightly saline", annotation_position="left",
               annotation_font_size=10)
fig6.add_hrect(y0=2.0, y1=4.0, fillcolor="orange", opacity=0.1, line_width=0,
               annotation_text="Moderately saline", annotation_position="left",
               annotation_font_size=10)
fig6.add_hrect(y0=4.0, y1=8.0, fillcolor="red", opacity=0.1, line_width=0,
               annotation_text="Strongly saline", annotation_position="left",
               annotation_font_size=10)

print("✓ Plot 6 created successfully")
fig6.show()


Creating Plot 6: EC by Irrigation Type and Water Source...
✓ Plot 6 created successfully


## 7. Soil Moisture Retention with Mulching - Back-to-back comparison

In [13]:
print("\nCreating Plot 7: Mulching Impact Analysis...")

# Prepare data for mulching comparison
mulching_data = []
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        for mulching in ['Yes', 'No']:
            subset = df[(df['Soil_Type'] == soil_type) & (df['Mulching_Used'] == mulching)]
            if len(subset) > 0:
                mulching_data.append({
                    'Soil_Type': soil_type,
                    'Mulching_Used': mulching,
                    'Avg_Soil_Moisture': subset['Soil_Moisture'].mean(),
                    'Std_Soil_Moisture': subset['Soil_Moisture'].std(),
                    'Count': len(subset)
                })

mulching_df = pd.DataFrame(mulching_data)

fig7 = px.bar(mulching_df, x='Soil_Type', y='Avg_Soil_Moisture',
              color='Mulching_Used',
              barmode='group',
              error_y='Std_Soil_Moisture',
              title='<b>7. Mulching Impact on Soil Moisture Retention</b><br><sup>Mulching significantly improves moisture retention, especially in sandy soils</sup>',
              labels={'Avg_Soil_Moisture': 'Average Soil Moisture (%)',
                     'Soil_Type': 'Soil Type'},
              text='Avg_Soil_Moisture',
              category_orders={'Soil_Type': ['Clay', 'Silt', 'Sandy', 'Loamy']})

fig7.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
fig7.update_layout(
    height=600,
    xaxis_title="Soil Type",
    yaxis_title="Average Soil Moisture (%)",
    legend_title="Mulching Used",
    plot_bgcolor='white',
    uniformtext_minsize=10,
    uniformtext_mode='hide'
)

# Calculate and show percentage improvement
for i, soil_type in enumerate(['Clay', 'Silt', 'Sandy', 'Loamy']):
    mulching_yes = mulching_df[(mulching_df['Soil_Type'] == soil_type) & (mulching_df['Mulching_Used'] == 'Yes')]
    mulching_no = mulching_df[(mulching_df['Soil_Type'] == soil_type) & (mulching_df['Mulching_Used'] == 'No')]
    
    if not mulching_yes.empty and not mulching_no.empty:
        improvement = ((mulching_yes['Avg_Soil_Moisture'].iloc[0] - mulching_no['Avg_Soil_Moisture'].iloc[0]) / 
                      mulching_no['Avg_Soil_Moisture'].iloc[0] * 100)
        
        fig7.add_annotation(
            x=i,
            y=max(mulching_yes['Avg_Soil_Moisture'].iloc[0], mulching_no['Avg_Soil_Moisture'].iloc[0]) * 1.05,
            text=f"+{improvement:.0f}%",
            showarrow=False,
            font=dict(size=12, color='green' if improvement > 0 else 'red'),
            bgcolor='rgba(255, 255, 255, 0.8)'
        )

print("✓ Plot 7 created successfully")
fig7.show()


Creating Plot 7: Mulching Impact Analysis...
✓ Plot 7 created successfully


## 8. Clay Soil Property Correlations Heatmap

In [15]:
print("\nCreating Plot 9: Clay Soil Correlation Matrix...")

# Filter for clay soil only
clay_df = df[df['Soil_Type'] == 'Clay']

# Select numerical features for correlation
numerical_features = ['Soil_pH', 'Soil_Moisture', 'Organic_Carbon', 
                      'Electrical_Conductivity', 'Temperature_C', 'Humidity', 
                      'Rainfall_mm', 'Previous_Irrigation_mm']

# Ensure all features exist in the dataframe
existing_features = [f for f in numerical_features if f in clay_df.columns]

if len(existing_features) >= 2:
    corr_matrix = clay_df[existing_features].corr()
    
    fig9 = go.Figure(data=go.Heatmap(
        z=corr_matrix.values,
        x=corr_matrix.columns,
        y=corr_matrix.columns,
        colorscale='RdBu_r',
        zmid=0,
        text=np.round(corr_matrix.values, 2),
        texttemplate='%{text}',
        textfont={"size": 10},
        hoverongaps=False,
        colorbar=dict(title="Correlation Coefficient", 
                     tickvals=[-1, -0.5, 0, 0.5, 1])
    ))
    
    fig9.update_layout(
        title=f'<b>9. Clay Soil: Feature Correlation Matrix</b><br><sup>Based on {len(clay_df)} clay soil samples | Red=positive, Blue=negative correlation</sup>',
        height=700,
        width=800,
        xaxis_title="Features",
        yaxis_title="Features",
        xaxis=dict(tickangle=45, tickfont=dict(size=10)),
        yaxis=dict(autorange='reversed', tickfont=dict(size=10)),
        plot_bgcolor='white'
    )
    
    print("✓ Plot 9 created successfully")
    fig9.show()
else:
    print("⚠ Not enough numerical features for correlation matrix")


Creating Plot 9: Clay Soil Correlation Matrix...
✓ Plot 9 created successfully


## 9. Soil Degradation Risk Assessment

In [17]:
print("\nCreating Plot 10: Soil Degradation Risk...")

# Calculate degradation risk score (0-100)
def calculate_degradation_risk(row):
    risk = 0
    
    # Salinity risk (EC > 2 dS/m = high risk)
    if row['Electrical_Conductivity'] > 2:
        risk += 40
    elif row['Electrical_Conductivity'] > 0.8:
        risk += 20
    
    # pH risk (extreme pH values)
    if row['Soil_pH'] < 5.5 or row['Soil_pH'] > 8.5:
        risk += 30
    elif row['Soil_pH'] < 6.0 or row['Soil_pH'] > 8.0:
        risk += 15
    
    # Low organic carbon risk
    if row['Organic_Carbon'] < 0.8:
        risk += 20
    elif row['Organic_Carbon'] < 1.5:
        risk += 10
    
    # Low moisture risk
    if row['Soil_Moisture'] < 20:
        risk += 10
    
    return min(100, risk)

df['Degradation_Risk'] = df.apply(calculate_degradation_risk, axis=1)

# Create degradation risk categories
def categorize_risk(risk_score):
    if risk_score < 25:
        return 'Low Risk'
    elif risk_score < 50:
        return 'Moderate Risk'
    elif risk_score < 75:
        return 'High Risk'
    else:
        return 'Very High Risk'

df['Risk_Category'] = df['Degradation_Risk'].apply(categorize_risk)

fig10 = px.scatter(df, x='Soil_pH', y='Electrical_Conductivity',
                   color='Risk_Category',
                   size='Degradation_Risk',
                   size_max=20,
                   hover_name='Soil_Type',
                   hover_data=['Region', 'Organic_Carbon', 'Soil_Moisture', 'Crop_Type'],
                   title='<b>10. Soil Degradation Risk Assessment: pH vs Salinity</b><br><sup>Size indicates risk level | Top-right quadrant indicates high degradation risk</sup>',
                   labels={'Electrical_Conductivity': 'EC (dS/m)', 'Soil_pH': 'Soil pH'},
                   color_discrete_sequence=['green', 'yellow', 'orange', 'red'],
                   category_orders={'Risk_Category': ['Low Risk', 'Moderate Risk', 'High Risk', 'Very High Risk']})

fig10.update_layout(
    height=600,
    xaxis_title="Soil pH",
    yaxis_title="Electrical Conductivity (dS/m)",
    legend_title="Risk Category",
    plot_bgcolor='white'
)

# Add risk zones
fig10.add_shape(type="rect",
               x0=8.5, x1=14, y0=2, y1=8,
               fillcolor="red", opacity=0.1,
               line_width=0)

fig10.add_shape(type="rect",
               x0=5.5, x1=8.5, y0=2, y1=8,
               fillcolor="orange", opacity=0.1,
               line_width=0)

# Add labels
fig10.add_annotation(x=11.0, y=7.6, text="Critical Risk Zone",
                     showarrow=False, font=dict(size=12, color="red"))

fig10.add_annotation(x=7.0, y=7.6, text="High Risk Zone",
                     showarrow=False, font=dict(size=12, color="orange"))


print("✓ Plot 10 created successfully")
fig10.show()


Creating Plot 10: Soil Degradation Risk...
✓ Plot 10 created successfully


## 10. Soil Moisture vs Rainfall Efficiency

In [18]:
print("\nCreating Plot 11: Rainfall Efficiency Analysis...")

# Create efficiency metric: Soil moisture per mm of rainfall
df['Rainfall_Efficiency'] = df['Soil_Moisture'] / (df['Rainfall_mm'] + 1)  # +1 to avoid division by zero

fig11 = px.scatter(df, x='Rainfall_mm', y='Soil_Moisture',
                   color='Soil_Type',
                   color_discrete_map=soil_color_map,
                   trendline='ols',
                   facet_col='Season',
                   facet_col_wrap=2,
                   hover_data=['Crop_Type', 'Region', 'Irrigation_Need'],
                   title='<b>11. Rainfall Utilization Efficiency by Soil Type and Season</b><br><sup>Steeper slopes indicate better water retention efficiency</sup>',
                   labels={'Soil_Moisture': 'Soil Moisture (%)', 
                          'Rainfall_mm': 'Rainfall (mm)'},
                   height=600,
                   category_orders={'Season': ['Rabi', 'Kharif', 'Zaid']})

fig11.update_layout(
    legend_title="Soil Type",
    plot_bgcolor='white'
)

# Calculate and display R-squared values for each season
for i, season in enumerate(df['Season'].cat.categories):
    season_df = df[df['Season'] == season]
    for soil_type in soil_color_map.keys():
        subset = season_df[season_df['Soil_Type'] == soil_type]
        if len(subset) > 10:
            r_squared = subset['Soil_Moisture'].corr(subset['Rainfall_mm']) ** 2
            # Add R² annotation
            fig11.add_annotation(
                xref=f"x{i+1}", yref=f"y{i+1}",
                x=0.05, y=0.95 - (list(soil_color_map.keys()).index(soil_type) * 0.08),
                xanchor="left", yanchor="top",
                text=f"{soil_type}: R²={r_squared:.2f}",
                showarrow=False,
                font=dict(size=9, color=soil_color_map.get(soil_type, '#000000')),
                bgcolor="white",
                bordercolor=soil_color_map.get(soil_type, '#000000'),
                borderwidth=1,
                borderpad=2
            )

print("✓ Plot 11 created successfully")
fig11.show()


Creating Plot 11: Rainfall Efficiency Analysis...
✓ Plot 11 created successfully


## 11. Organic Carbon Trends Analysis

In [19]:
print("\nCreating Plot 12: Organic Carbon Trends...")

# Sort by factors that might indicate improvement over time
df_sorted = df.sort_values(['Region', 'Mulching_Used', 'Soil_Type']).reset_index(drop=True)
df_sorted['Sequence_ID'] = range(len(df_sorted))

fig12 = px.line(df_sorted, x='Sequence_ID', y='Organic_Carbon',
                color='Mulching_Used',
                line_dash='Soil_Type',
                hover_data=['Crop_Type', 'Region', 'Soil_Moisture', 'Season'],
                title='<b>12. Organic Carbon Trends by Mulching Practice and Soil Type</b><br><sup>Mulching shows consistent organic carbon improvement over time</sup>',
                labels={'Organic_Carbon': 'Organic Carbon (% mg/L)', 
                       'Sequence_ID': 'Sample Sequence (Proxy for Time)'},
                height=500,
                color_discrete_map={'Yes': 'green', 'No': 'red'})

# Add moving average
window_size = min(100, len(df_sorted) // 10)
if window_size > 10:
    df_sorted['OC_MA'] = df_sorted['Organic_Carbon'].rolling(window=window_size, center=True).mean()
    fig12.add_trace(go.Scatter(x=df_sorted['Sequence_ID'], y=df_sorted['OC_MA'],
                              mode='lines', name=f'{window_size}-Sample Moving Avg',
                              line=dict(color='black', width=3, dash='dot'),
                              opacity=0.7))

fig12.update_layout(
    xaxis_title="Sample Sequence (Proxy for Time Progression)",
    yaxis_title="Organic Carbon (% mg/L)",
    legend_title="Factors",
    hovermode='x unified',
    plot_bgcolor='white'
)

print("✓ Plot 12 created successfully")
fig12.show()


Creating Plot 12: Organic Carbon Trends...
✓ Plot 12 created successfully


## 12. Soil Compaction Indicators - Parallel Coordinates

In [24]:
print("\nCreating Plot 13: Soil Compaction Risk Profile...")

# Create compaction risk indicators
def calculate_compaction_risk(row):
    risk = 0
    
    # Soil type risk (clay more prone to compaction)
    soil_risk = {'Clay': 0.4, 'Silt': 0.3, 'Loamy': 0.2, 'Sandy': 0.1}
    risk += soil_risk.get(row['Soil_Type'], 0.3)
    
    # Irrigation method risk (flood irrigation causes more compaction)
    irrigation_risk = {'Canal': 0.3, 'Rainfed': 0.2, 'Sprinkler': 0.1, 'Drip': 0.05}
    risk += irrigation_risk.get(row['Irrigation_Type'], 0.2)
    
    # Moisture risk (very wet soil compacts more)
    if row['Soil_Moisture'] > 70:
        risk += 0.3
    elif row['Soil_Moisture'] > 50:
        risk += 0.15
    
    return min(1.0, risk) * 100  # Convert to percentage

df['Compaction_Risk'] = df.apply(calculate_compaction_risk, axis=1)

# Sample for better visualization performance
sample_size = min(300, len(df))
sample_df = df.sample(sample_size, random_state=42)

soil_type_codes = {k: i for i, k in enumerate(soil_color_map.keys())}
sample_df['Soil_Type_Code'] = sample_df['Soil_Type'].map(soil_type_codes)

fig13 = px.parallel_coordinates(
    sample_df,
    dimensions=['Soil_Moisture', 'Organic_Carbon',
                'Electrical_Conductivity', 'Compaction_Risk'],
    color='Soil_Type_Code',
    color_continuous_scale=[(i/(len(soil_color_map)-1), v) for i, v in enumerate(soil_color_map.values())],
    labels={'Soil_Moisture': 'Moisture (%)',
            'Organic_Carbon': 'OC (% mg/L)',
            'Electrical_Conductivity': 'EC (dS/m)',
            'Compaction_Risk': 'Compaction Risk (%)'},
    title=f'<b>13. Soil Compaction Risk Profile</b><br><sup>Parallel coordinates showing {sample_size} samples | Lines connect soil properties for each sample</sup>'
)


fig13.update_layout(
    height=600,
    plot_bgcolor='white'
)

print("✓ Plot 13 created successfully")
fig13.show()


Creating Plot 13: Soil Compaction Risk Profile...
✓ Plot 13 created successfully


## 13. Soil Type Suitability Treemap

In [25]:

print("\nCreating Plot 14: Soil Type Suitability Analysis...")

# Calculate success rate (Low Irrigation Need) for each combination
success_data = []
for region in df['Region'].cat.categories:
    region_df = df[df['Region'] == region]
    for soil_type in soil_color_map.keys():
        if soil_type in df['Soil_Type'].cat.categories:
            soil_df = region_df[region_df['Soil_Type'] == soil_type]
            for crop in df['Crop_Type'].cat.categories:
                crop_df = soil_df[soil_df['Crop_Type'] == crop]
                if len(crop_df) > 0:
                    success_rate = (crop_df['Irrigation_Need'] == 'Low').mean() * 100
                    avg_moisture = crop_df['Soil_Moisture'].mean()
                    success_data.append({
                        'Region': region,
                        'Soil_Type': soil_type,
                        'Crop_Type': crop,
                        'Success_Rate': success_rate,
                        'Avg_Moisture': avg_moisture,
                        'Sample_Count': len(crop_df)
                    })

if success_data:
    success_df = pd.DataFrame(success_data)
    
    fig14 = px.treemap(success_df, 
                      path=['Region', 'Soil_Type', 'Crop_Type'],
                      values='Sample_Count',
                      color='Success_Rate',
                      color_continuous_scale='RdYlGn',
                      range_color=[0, 100],
                      hover_data=['Success_Rate', 'Avg_Moisture', 'Sample_Count'],
                      title='<b>14. Soil Type Suitability Analysis</b><br><sup>Color = Success Rate (Low Irrigation Need %) | Size = Sample Count</sup>',
                      height=700)
    
    fig14.update_layout(
        coloraxis_colorbar=dict(
            title="Success Rate %",
            tickformat=".0f",
            ticksuffix="%",
            thickness=20
        ),
        plot_bgcolor='white'
    )
    
    print("✓ Plot 14 created successfully")
    fig14.show()
else:
    print("⚠ No success data available for treemap")


Creating Plot 14: Soil Type Suitability Analysis...
✓ Plot 14 created successfully


## 14. Soil Amendment Needs - Threshold Analysis

In [26]:
print("\nCreating Plot 15: Soil Amendment Requirements...")

fig15 = make_subplots(
    rows=2, cols=2,
    subplot_titles=('pH Amendment Needs', 'Organic Carbon Needs', 
                    'Salinity Issues', 'Moisture Deficits'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'bar'}]],
    vertical_spacing=0.15,
    horizontal_spacing=0.1
)

# 15a. pH Amendment Needs (pH outside 5.5-8.5)
ph_needs = []
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        percent_needing = ((subset['Soil_pH'] < 5.5) | (subset['Soil_pH'] > 8.5)).mean() * 100
        ph_needs.append({'Soil_Type': soil_type, 'Percent_Needing': percent_needing})

ph_df = pd.DataFrame(ph_needs)
fig15.add_trace(
    go.Bar(x=ph_df['Soil_Type'], y=ph_df['Percent_Needing'],
           marker_color=[soil_color_map.get(st, '#000000') for st in ph_df['Soil_Type']],
           text=ph_df['Percent_Needing'].round(1),
           texttemplate='%{text}%',
           textposition='auto',
           name='pH Needs'),
    row=1, col=1
)

# 15b. Organic Carbon Needs (OC < 0.8%)
oc_needs = []
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        percent_low = (subset['Organic_Carbon'] < 0.8).mean() * 100
        oc_needs.append({'Soil_Type': soil_type, 'Percent_Low': percent_low})

oc_df = pd.DataFrame(oc_needs)
fig15.add_trace(
    go.Bar(x=oc_df['Soil_Type'], y=oc_df['Percent_Low'],
           marker_color=[soil_color_map.get(st, '#000000') for st in oc_df['Soil_Type']],
           text=oc_df['Percent_Low'].round(1),
           texttemplate='%{text}%',
           textposition='auto',
           name='OC Needs'),
    row=1, col=2
)

# 15c. Salinity Issues (EC > 2.0 dS/m)
ec_needs = []
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        percent_high = (subset['Electrical_Conductivity'] > 2.0).mean() * 100
        ec_needs.append({'Soil_Type': soil_type, 'Percent_High': percent_high})

ec_df = pd.DataFrame(ec_needs)
fig15.add_trace(
    go.Bar(x=ec_df['Soil_Type'], y=ec_df['Percent_High'],
           marker_color=[soil_color_map.get(st, '#000000') for st in ec_df['Soil_Type']],
           text=ec_df['Percent_High'].round(1),
           texttemplate='%{text}%',
           textposition='auto',
           name='Salinity Issues'),
    row=2, col=1
)

# 15d. Moisture Deficits (Moisture < 25%)
moisture_needs = []
for soil_type in soil_color_map.keys():
    if soil_type in df['Soil_Type'].cat.categories:
        subset = df[df['Soil_Type'] == soil_type]
        percent_low = (subset['Soil_Moisture'] < 25).mean() * 100
        moisture_needs.append({'Soil_Type': soil_type, 'Percent_Low': percent_low})

moisture_df = pd.DataFrame(moisture_needs)
fig15.add_trace(
    go.Bar(x=moisture_df['Soil_Type'], y=moisture_df['Percent_Low'],
           marker_color=[soil_color_map.get(st, '#000000') for st in moisture_df['Soil_Type']],
           text=moisture_df['Percent_Low'].round(1),
           texttemplate='%{text}%',
           textposition='auto',
           name='Moisture Deficits'),
    row=2, col=2
)

fig15.update_layout(
    height=700,
    title_text='<b>15. Soil Amendment Requirements Analysis</b><br><sup>Percentage of fields needing soil improvements by soil type</sup>',
    showlegend=False,
    plot_bgcolor='white'
)

# Update y-axis labels
fig15.update_yaxes(title_text="% Needing Amendment", row=1, col=1, range=[0, 100])
fig15.update_yaxes(title_text="% Low OC (<0.8%)", row=1, col=2, range=[0, 100])
fig15.update_yaxes(title_text="% High Salinity (>2 dS/m)", row=2, col=1, range=[0, 100])
fig15.update_yaxes(title_text="% Low Moisture (<25%)", row=2, col=2, range=[0, 100])

print("✓ Plot 15 created successfully")
fig15.show()


Creating Plot 15: Soil Amendment Requirements...
✓ Plot 15 created successfully
