# **CROP-SPECIFIC ANALYSIS**

In [1]:
%run ./_setup.py
import os
import sys

Python: c:\Users\Admin\Desktop\Irrigation Water Requirement Prediction\.venv\Scripts\python.exe
Environment ready
Shape: (10000, 20)
<class 'pandas.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Soil_Type                10000 non-null  str    
 1   Soil_pH                  10000 non-null  float64
 2   Soil_Moisture            10000 non-null  float64
 3   Organic_Carbon           10000 non-null  float64
 4   Electrical_Conductivity  10000 non-null  float64
 5   Temperature_C            10000 non-null  float64
 6   Humidity                 10000 non-null  float64
 7   Rainfall_mm              10000 non-null  float64
 8   Sunlight_Hours           10000 non-null  float64
 9   Wind_Speed_kmh           10000 non-null  float64
 10  Crop_Type                10000 non-null  str    
 11  Crop_Growth_Stage        10000 non-null  str    
 12  Season   

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv("../data/irrigation_prediction.csv")

In [3]:
# Color palette for crops
df['Crop_Type'] = df['Crop_Type'].astype('category')
crop_colors = px.colors.qualitative.Set3[:len(df['Crop_Type'].unique())]
crop_color_map = dict(zip(sorted(df['Crop_Type'].cat.categories), crop_colors))


## *16. Crop Type vs Water Requirement*

In [4]:
print("\n16. Crop Water Requirements...")

water_by_crop = df.groupby('Crop_Type', observed=False).agg(
    Avg_Previous_Irrigation=('Previous_Irrigation_mm', 'mean'),
    Avg_Rainfall=('Rainfall_mm', 'mean'),
    Count=('Crop_Type', 'count')
).reset_index()

fig16 = px.bar(water_by_crop, x='Crop_Type', y='Avg_Previous_Irrigation',
               color='Crop_Type', color_discrete_map=crop_color_map,
               title='<b>16. Average Irrigation by Crop Type</b>',
               text_auto='.1f',
               labels={'Avg_Previous_Irrigation': 'Avg Irrigation (mm)', 'Crop_Type': 'Crop'})

fig16.update_layout(height=500, showlegend=False, plot_bgcolor='white')
fig16.add_scatter(x=water_by_crop['Crop_Type'], y=water_by_crop['Avg_Rainfall'],
                  mode='markers+lines', name='Avg Rainfall', 
                  marker=dict(size=12, color='blue'),
                  line=dict(color='blue', dash='dash'))

fig16.show()


16. Crop Water Requirements...


## *17. Crop Growth Stage Water Demand*

In [5]:
print("\n17. Growth Stage Water Demand...")

stage_water = df.groupby(['Crop_Type', 'Crop_Growth_Stage'], observed=False).agg(
    Avg_Water_Need=('Previous_Irrigation_mm', 'mean'),
    Samples=('Crop_Type', 'count')
).reset_index()

fig17 = px.line(stage_water, x='Crop_Growth_Stage', y='Avg_Water_Need',
                color='Crop_Type', color_discrete_map=crop_color_map,
                markers=True, title='<b>17. Water Demand Across Growth Stages</b>',
                labels={'Avg_Water_Need': 'Avg Irrigation (mm)', 'Crop_Growth_Stage': 'Growth Stage'})

fig17.update_layout(height=500, plot_bgcolor='white',
                   xaxis={'categoryorder': 'array', 
                          'categoryarray': ['Sowing', 'Vegetative', 'Flowering', 'Harvest']})
fig17.show()


17. Growth Stage Water Demand...


## *18. Crop Yield vs Temperature Range*

In [6]:
print("\n18. Crop Temperature Tolerance...")

fig18 = px.box(df, x='Crop_Type', y='Temperature_C', color='Irrigation_Need',
               title='<b>18. Temperature Range by Crop & Irrigation Need</b>',
               labels={'Temperature_C': 'Temperature (Â°C)', 'Crop_Type': 'Crop'})

fig18.update_layout(height=500, plot_bgcolor='white', boxmode='group')
fig18.show()


18. Crop Temperature Tolerance...


## *19. Crop Ã Soil Compatibility Matrix*

In [7]:
print("\n19. Crop-Soil Compatibility...")

compatibility = df.groupby(['Soil_Type', 'Crop_Type'], observed=False).agg(
    Success_Rate=('Irrigation_Need', lambda x: (x == 'Low').mean() * 100),
    Count=('Soil_Type', 'count')
).reset_index()

fig19 = px.density_heatmap(compatibility, x='Soil_Type', y='Crop_Type',
                           z='Success_Rate', title='<b>19. Crop-Soil Success Matrix</b>',
                           labels={'Success_Rate': 'Low Irrigation Need (%)'},
                           color_continuous_scale='RdYlGn')

fig19.update_layout(height=500, plot_bgcolor='white')
fig19.show()


19. Crop-Soil Compatibility...


## *20. Crop Resilience to Rainfall Variability*

In [8]:
print("\n20. Crop Rainfall Resilience...")

df['Rainfall_Quartile'] = pd.qcut(df['Rainfall_mm'], 4, labels=['Very Low', 'Low', 'High', 'Very High'])
rainfall_resilience = df.groupby(['Crop_Type', 'Rainfall_Quartile'], observed=False).agg(
    Success_Rate=('Irrigation_Need', lambda x: (x == 'Low').mean() * 100),
    Samples=('Crop_Type', 'count')
).reset_index()

fig20 = px.bar(rainfall_resilience, x='Crop_Type', y='Success_Rate',
               color='Rainfall_Quartile', barmode='group',
               title='<b>20. Crop Resilience Across Rainfall Levels</b>',
               labels={'Success_Rate': 'Low Irrigation Need (%)', 'Crop_Type': 'Crop'})

fig20.update_layout(height=500, plot_bgcolor='white')
fig20.show()


20. Crop Rainfall Resilience...


## *21. Crop Rotation Benefits*

In [9]:
print("\n21. Crop Rotation Analysis...")

# Create synthetic rotation patterns for visualization
rotation_patterns = {
    'WheatâMaize': 75, 'MaizeâCotton': 68, 'RiceâSugarcane': 82,
    'CottonâWheat': 71, 'SugarcaneâPotato': 78, 'PotatoâWheat': 65
}

fig21 = px.bar(x=list(rotation_patterns.keys()), y=list(rotation_patterns.values()),
               title='<b>21. Success Rate by Crop Rotation Pattern</b>',
               labels={'x': 'Rotation Pattern', 'y': 'Success Rate (%)'},
               color=list(rotation_patterns.values()),
               color_continuous_scale='Viridis')

fig21.update_layout(height=500, plot_bgcolor='white', showlegend=False)
fig21.show()


21. Crop Rotation Analysis...


## *22. Crop Water Productivity*

In [10]:
print("\n22. Crop Water Productivity...")

productivity = df.groupby('Crop_Type', observed=False).agg(
    Avg_Water_Used=('Previous_Irrigation_mm', 'mean'),
    Avg_Field_Size=('Field_Area_hectare', 'mean'),
    Avg_Soil_Moisture=('Soil_Moisture', 'mean')
).reset_index()

productivity['Water_Productivity'] = productivity['Avg_Soil_Moisture'] / (productivity['Avg_Water_Used'] + 1)

fig22 = px.scatter(productivity, x='Avg_Water_Used', y='Avg_Soil_Moisture',
                   size='Avg_Field_Size', color='Crop_Type',
                   color_discrete_map=crop_color_map, size_max=50,
                   title='<b>22. Crop Water Productivity</b><br>Size = Field Area, Y = Soil Moisture',
                   labels={'Avg_Water_Used': 'Avg Irrigation (mm)', 
                          'Avg_Soil_Moisture': 'Avg Soil Moisture (%)'})

fig22.update_layout(height=500, plot_bgcolor='white')
fig22.show()


22. Crop Water Productivity...


## *23. Crop Growth Rate Indicators*

In [12]:
from plotly.subplots import make_subplots


print("\n23. Growth Stage Analysis...")

growth_factors = df.groupby(['Crop_Type', 'Crop_Growth_Stage'], observed=False).agg({
    'Temperature_C': 'mean',
    'Sunlight_Hours': 'mean',
    'Soil_Moisture': 'mean'
}).reset_index()

fig23 = make_subplots(rows=2, cols=2, subplot_titles=('Temperature', 'Sunlight', 'Soil Moisture', 'Combined'))

for i, (factor, row, col) in enumerate([('Temperature_C', 1, 1), ('Sunlight_Hours', 1, 2), 
                                        ('Soil_Moisture', 2, 1)], 1):
    pivot = growth_factors.pivot(index='Crop_Growth_Stage', columns='Crop_Type', values=factor)
    for crop in pivot.columns:
        fig23.add_trace(go.Scatter(x=pivot.index, y=pivot[crop], name=crop, 
                                   mode='lines+markers', showlegend=(i==1)),
                        row=row, col=col)

# Combined metric
growth_factors['Growth_Index'] = (
    growth_factors['Temperature_C'].clip(15, 35) * 0.4 +
    growth_factors['Sunlight_Hours'] * 0.3 +
    growth_factors['Soil_Moisture'] * 0.3
)
for crop in growth_factors['Crop_Type'].unique():
    subset = growth_factors[growth_factors['Crop_Type'] == crop]
    fig23.add_trace(go.Scatter(x=subset['Crop_Growth_Stage'], y=subset['Growth_Index'],
                               name=crop, mode='lines+markers', showlegend=False),
                    row=2, col=2)

fig23.update_layout(height=700, title_text='<b>23. Growth Stage Environmental Factors</b>', 
                   plot_bgcolor='white')
fig23.show()


23. Growth Stage Analysis...


## *24. Crop Disease/Stress Risk*

In [13]:
print("\n24. Crop Stress Risk...")

fig24 = px.density_contour(df, x='Humidity', y='Temperature_C',
                          color='Crop_Type', facet_col='Crop_Type',
                          facet_col_wrap=3, title='<b>24. Disease Risk Zones: Humidity vs Temperature</b>',
                          labels={'Humidity': 'Humidity (%)', 'Temperature_C': 'Temperature (Â°C)'})

fig24.update_layout(height=700, plot_bgcolor='white')
fig24.show()


24. Crop Stress Risk...


## *25. Crop Water Cost Efficiency*

In [15]:
print("\n25. Water Cost Efficiency...")

# Create synthetic price data for demonstration
crop_prices = {'Wheat': 250, 'Maize': 220, 'cotton': 500, 'Rice': 300, 
               'Sugarcane': 280, 'Potato': 200}
df['Crop_Price'] = df['Crop_Type'].map(crop_prices)
df['Revenue_per_ha'] = df['Crop_Price'] * 10  # Simplified calculation
df['Water_Efficiency'] = df['Revenue_per_ha'] / (df['Previous_Irrigation_mm'] + 1)

efficiency = df.groupby('Crop_Type', observed=False).agg(
    Avg_Water_Efficiency=('Water_Efficiency', 'mean'),
    Avg_Revenue=('Revenue_per_ha', 'mean')
).reset_index()

eff_plot = efficiency.dropna(subset=['Avg_Water_Efficiency'])

fig25 = px.scatter(
    eff_plot,
    x='Avg_Revenue', y='Avg_Water_Efficiency',
    color='Crop_Type', color_discrete_map=crop_color_map,
    size='Avg_Water_Efficiency', size_max=40,
    title='<b>25. Economic Water Efficiency</b><br>X = Revenue/ha, Y = Revenue per mm water',
    labels={'Avg_Revenue': 'Avg Revenue per ha',
            'Avg_Water_Efficiency': 'Water Efficiency (Revenue/mm)'}
)


fig25.update_layout(height=500, plot_bgcolor='white')
fig25.show()


25. Water Cost Efficiency...


## *26. Crop Calendar Optimization*

In [16]:
print("\n26. Crop Calendar...")

calendar_data = []
seasons_order = ['Rabi', 'Kharif', 'Zaid']
crop_season_map = {
    'Wheat': 'Rabi', 'Maize': 'Kharif', 'Cotton': 'Kharif',
    'Rice': 'Kharif', 'Sugarcane': 'Year_round', 'Potato': 'Rabi'
}

for crop in df['Crop_Type'].unique():
    for season in df['Season'].unique():
        subset = df[(df['Crop_Type'] == crop) & (df['Season'] == season)]
        if len(subset) > 0:
            calendar_data.append({
                'Crop': crop,
                'Season': season,
                'Success_Rate': (subset['Irrigation_Need'] == 'Low').mean() * 100,
                'Count': len(subset)
            })

calendar_df = pd.DataFrame(calendar_data)

fig26 = px.bar(calendar_df, x='Crop', y='Success_Rate', color='Season',
               barmode='group', title='<b>26. Optimal Seasons for Crops</b>',
               labels={'Success_Rate': 'Low Irrigation Need (%)', 'Crop': 'Crop Type'},
               category_orders={'Season': seasons_order})

fig26.update_layout(height=500, plot_bgcolor='white')
fig26.show()


26. Crop Calendar...


## *27. Crop Ã Irrigation Method Efficiency*

In [17]:
print("\n27. Irrigation Method Efficiency by Crop...")

method_efficiency = df.groupby(['Crop_Type', 'Irrigation_Type'], observed=False).agg(
    Water_Saved=('Previous_Irrigation_mm', lambda x: x.mean() - df['Previous_Irrigation_mm'].mean()),
    Success_Rate=('Irrigation_Need', lambda x: (x == 'Low').mean() * 100),
    Samples=('Crop_Type', 'count')
).reset_index()

fig27 = px.bar(method_efficiency, x='Crop_Type', y='Water_Saved',
               color='Irrigation_Type', barmode='group',
               title='<b>27. Water Savings by Irrigation Method & Crop</b>',
               labels={'Water_Saved': 'Water Saved vs Average (mm)', 'Crop_Type': 'Crop'})

fig27.update_layout(height=500, plot_bgcolor='white')
fig27.show()


27. Irrigation Method Efficiency by Crop...


## *28. Crop Sensitivity Analysis*

In [18]:
print("\n28. Crop Sensitivity Analysis...")

# Calculate correlation of each factor with irrigation need for each crop
sensitivity_data = []
for crop in df['Crop_Type'].unique():
    crop_df = df[df['Crop_Type'] == crop]
    if len(crop_df) > 10:
        # Convert irrigation need to numeric for correlation
        need_numeric = crop_df['Irrigation_Need'].map({'Low': 0, 'Medium': 1, 'High': 2})
        for factor in ['Temperature_C', 'Rainfall_mm', 'Soil_Moisture', 'Humidity']:
            if factor in crop_df.columns:
                corr = need_numeric.corr(crop_df[factor])
                sensitivity_data.append({
                    'Crop': crop,
                    'Factor': factor,
                    'Correlation': abs(corr),
                    'Direction': 'Positive' if corr > 0 else 'Negative'
                })

sensitivity_df = pd.DataFrame(sensitivity_data)

fig28 = px.bar(sensitivity_df, x='Factor', y='Correlation', color='Crop',
               facet_col='Crop', facet_col_wrap=3,
               title='<b>28. Crop Sensitivity to Environmental Factors</b><br>|Correlation| with Irrigation Need',
               labels={'Correlation': '|Correlation|', 'Factor': 'Environmental Factor'})

fig28.update_layout(height=700, plot_bgcolor='white')
fig28.show()


28. Crop Sensitivity Analysis...


## *29. Crop Success Predictors*

In [19]:
# Create parallel categories plot for top combinations
top_combinations = df.groupby(['Soil_Type', 'Crop_Type', 'Season'], observed=False).size().reset_index(name='Count')
top_combinations = top_combinations.nlargest(15, 'Count')

fig29 = px.parallel_categories(top_combinations, 
                              dimensions=['Soil_Type', 'Crop_Type', 'Season'],
                              color='Count',
                              title='<b>29. Most Common Crop-Soil-Season Combinations</b>',
                              color_continuous_scale='Blues')

fig29.update_layout(height=500, plot_bgcolor='white')
fig29.show()

## *30. Crop Innovation Adoption*

In [20]:
print("\n30. Modern vs Traditional Methods...")

modern_methods = ['Drip', 'Sprinkler']
df['Method_Type'] = df['Irrigation_Type'].apply(lambda x: 'Modern' if x in modern_methods else 'Traditional')

adoption = df.groupby(['Crop_Type', 'Method_Type'], observed=False).agg(
    Success_Rate=('Irrigation_Need', lambda x: (x == 'Low').mean() * 100),
    Count=('Crop_Type', 'count')
).reset_index()

fig30 = px.line(adoption, x='Crop_Type', y='Success_Rate',
                color='Method_Type', markers=True,
                title='<b>30. Success Rate: Modern vs Traditional Irrigation</b>',
                labels={'Success_Rate': 'Low Irrigation Need (%)', 'Crop_Type': 'Crop'})

fig30.update_layout(height=500, plot_bgcolor='white')
fig30.show()


30. Modern vs Traditional Methods...
