## **FARM MANAGEMENT ANALYSIS**

In [1]:
%run ./_setup.py
import os
import sys

Python: c:\Users\Admin\Desktop\Irrigation Water Requirement Prediction\.venv\Scripts\python.exe
Environment ready
Shape: (10000, 20)
<class 'pandas.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Soil_Type                10000 non-null  str    
 1   Soil_pH                  10000 non-null  float64
 2   Soil_Moisture            10000 non-null  float64
 3   Organic_Carbon           10000 non-null  float64
 4   Electrical_Conductivity  10000 non-null  float64
 5   Temperature_C            10000 non-null  float64
 6   Humidity                 10000 non-null  float64
 7   Rainfall_mm              10000 non-null  float64
 8   Sunlight_Hours           10000 non-null  float64
 9   Wind_Speed_kmh           10000 non-null  float64
 10  Crop_Type                10000 non-null  str    
 11  Crop_Growth_Stage        10000 non-null  str    
 12  Season   

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv("../data/irrigation_prediction.csv")

In [3]:
# Ensure categorical columns are properly typed
categorical_cols = ['Soil_Type', 'Crop_Type', 'Crop_Growth_Stage', 'Season', 
                    'Irrigation_Type', 'Water_Source', 'Mulching_Used', 'Region', 
                    'Irrigation_Need']

for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].astype('category')
    else:
        print(f"Warning: Column '{col}' not found in dataset")

### *61. Field Size vs Efficiency*

In [None]:
print("\n61. Field Size Efficiency...")

df['Water_Efficiency'] = df['Soil_Moisture'] / (df['Previous_Irrigation_mm'] + 1)
fig61 = px.scatter(df, x='Field_Area_hectare', y='Water_Efficiency', 
                  color='Irrigation_Type', trendline='ols',
                  title='<b>61. Field Size vs Water Efficiency</b>',
                  labels={'Field_Area_hectare': 'Field Area (ha)', 
                         'Water_Efficiency': 'Water Efficiency'})

fig61.update_layout(height=500, plot_bgcolor='white')
fig61.show()


61. Field Size Efficiency...


### *62. Mulching Impact Quantified*

In [5]:
print("\n62. Mulching Benefits...")

mulching_stats = df.groupby(['Soil_Type', 'Mulching_Used'], observed=False).agg({
    'Soil_Moisture': 'mean',
    'Previous_Irrigation_mm': 'mean',
    'Irrigation_Need': lambda x: (x == 'Low').mean() * 100
}).reset_index()

fig62 = px.bar(mulching_stats, x='Soil_Type', y='Soil_Moisture', 
               color='Mulching_Used', barmode='group', text_auto='.1f',
               title='<b>62. Mulching Impact on Soil Moisture</b>',
               labels={'Soil_Moisture': 'Avg Soil Moisture (%)'})

fig62.update_layout(height=500, plot_bgcolor='white')
fig62.show()


62. Mulching Benefits...


### *63. Labor Requirements*

In [7]:
print("\n63. Labor Analysis...")

# Assign labor scores to irrigation methods
labor_scores = {'Drip': 1, 'Sprinkler': 2, 'Canal': 3, 'Rainfed': 1}
df['Labor_Score'] = df['Irrigation_Type'].map(labor_scores)

labor_by_crop = df.groupby("Crop_Type", observed=False).agg(
    Labor_Score=("Labor_Score", "mean"),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()

fig63 = px.scatter(labor_by_crop, x='Labor_Score', y='Success_Rate',
                   color='Crop_Type', size='Success_Rate',
                   title='<b>63. Labor Intensity vs Success Rate</b>',
                   labels={'Labor_Score': 'Labor Intensity (1-4)', 
                          'Success_Rate': 'Success Rate (%)'})

fig63.update_layout(height=500, plot_bgcolor='white')
fig63.show()


63. Labor Analysis...


### *64. Equipment Utilization*

In [8]:
print("\n64. Equipment Analysis...")

equipment_use = df.groupby(['Irrigation_Type', 'Field_Area_hectare'], observed=False).size().reset_index(name='Count')
equipment_use['Field_Size_Category'] = pd.cut(equipment_use['Field_Area_hectare'], 
                                             bins=[0, 1, 5, 10, 50], 
                                             labels=['Small', 'Medium', 'Large', 'Very Large'])

fig64 = px.sunburst(equipment_use, path=['Irrigation_Type', 'Field_Size_Category'], 
                    values='Count', title='<b>64. Equipment Use by Field Size</b>')

fig64.update_layout(height=500)
fig64.show()


64. Equipment Analysis...


### *65. Input Cost Breakdown*

In [11]:
print("\n65. Cost Analysis...")

# Simplified cost calculation
df["Water_Cost"] = (df["Previous_Irrigation_mm"].astype(float) * 0.1)
df["Energy_Cost"] = df["Irrigation_Type"].map(
    {"Drip": 20, "Sprinkler": 15, "Canal": 5, "Rainfed": 0}
).astype(float)

df["Total_Cost"] = df["Water_Cost"] + df["Energy_Cost"]


cost_by_crop = df.groupby("Crop_Type", observed=False).agg(
    Total_Cost=("Total_Cost", "mean"),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()


fig65 = px.scatter(cost_by_crop, x='Total_Cost', y='Success_Rate',
                   color='Crop_Type', size='Total_Cost',
                   title='<b>65. Cost-Effectiveness by Crop</b>',
                   labels={'Total_Cost': 'Avg Cost ($/ha)', 
                          'Success_Rate': 'Success Rate (%)'})

fig65.update_layout(height=500, plot_bgcolor='white')
fig65.show()


65. Cost Analysis...


### *66. Technology Adoption Curve*

In [17]:
print("\n66. Technology Adoption...")

# Create adoption sequence
df_sorted = df.sort_values("Soil_Moisture").reset_index(drop=True)
df_sorted["Sequence"] = range(len(df_sorted))
df_sorted["Modern_Irrigation"] = df_sorted["Irrigation_Type"].isin(["Drip", "Sprinkler"])

adoption_rate = df_sorted.groupby(
    pd.cut(df_sorted["Sequence"], bins=10)
).agg(
    Modern_Irrigation=("Modern_Irrigation", "mean"),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()

# Make the bin column serializable
adoption_rate["Sequence"] = adoption_rate["Sequence"].astype(str)

fig66 = px.line(
    adoption_rate,
    x="Sequence",
    y=["Modern_Irrigation", "Success_Rate"],
    title="<b>66. Technology Adoption & Success Rate</b>",
    labels={"value": "Percentage", "variable": "Metric"},
)

fig66.update_layout(height=500, plot_bgcolor="white")
fig66.show()



66. Technology Adoption...


### *67. Training Impact Assessment*

In [18]:
print("\n67. Training Impact...")

# Simulate training impact (modern methods as proxy)
training_impact = df.groupby("Region", observed=False).agg(
    Modern_Irrigation=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Mulching_Used=("Mulching_Used", lambda x: (x == "Yes").mean() * 100),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()


fig67 = px.scatter(training_impact, x='Modern_Irrigation', y='Success_Rate',
                   color='Region', size='Mulching_Used',
                   title='<b>67. Technology Adoption vs Success Rate</b><br>Size = Mulching Adoption',
                   labels={'Modern_Irrigation': 'Modern Methods (%)', 
                          'Success_Rate': 'Success Rate (%)'})

fig67.update_layout(height=500, plot_bgcolor='white')
fig67.show()


67. Training Impact...


### *68. Credit Access Impact*

In [19]:
print("\n68. Credit Access Analysis...")

# Use field size as proxy for credit access
df['Credit_Access'] = pd.cut(df['Field_Area_hectare'], 
                            bins=[0, 2, 5, 10, 100], 
                            labels=['Very Low', 'Low', 'Medium', 'High'])

credit_impact = df.groupby(["Credit_Access", "Crop_Type"], observed=False).agg(
    Modern_Irrigation=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()


fig68 = px.bar(credit_impact, x='Credit_Access', y='Modern_Irrigation',
               color='Crop_Type', barmode='group',
               title='<b>68. Technology Adoption by Credit Access</b>',
               labels={'Modern_Irrigation': 'Modern Methods (%)'})

fig68.update_layout(height=500, plot_bgcolor='white')
fig68.show()


68. Credit Access Analysis...


### *69. Market Access Influence*

In [20]:
print("\n69. Market Access...")

# Use region as proxy for market access
market_access = df.groupby("Region", observed=False).agg(
    Modern_Irrigation=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Crop_Diversity=("Crop_Type", "nunique"),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()

fig69 = px.scatter(market_access, x='Crop_Diversity', y='Modern_Irrigation',
                   color='Region', size='Success_Rate',
                   title='<b>69. Market Access Indicators</b><br>Size = Success Rate',
                   labels={'Crop_Diversity': 'Number of Crops Grown',
                          'Modern_Irrigation': 'Modern Methods (%)'})

fig69.update_layout(height=500, plot_bgcolor='white')
fig69.show()


69. Market Access...


### *70. Cooperative Membership Benefits*

In [21]:
print("\n70. Cooperative Benefits...")

coop_benefits = df.groupby("Region", observed=False).agg(
    Mulching_Rate=("Mulching_Used", lambda x: (x == "Yes").mean() * 100),
    Modern_Rate=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()

fig70 = px.parallel_coordinates(coop_benefits, 
                               dimensions=['Mulching_Rate', 'Modern_Rate', 'Success_Rate'],
                               color='Success_Rate', color_continuous_scale='RdYlGn',
                               title='<b>70. Cooperative Benefits Indicators</b>')

fig70.update_layout(height=500)
fig70.show()


70. Cooperative Benefits...


### *71. Succession Planning Status*

In [22]:
print("\n71. Farm Continuity...")

# Use crop diversity and modern methods as proxy
continuity = df.groupby("Region", observed=False).agg(
    Avg_Field_Size=("Field_Area_hectare", "mean"),
    Crop_Diversity=("Crop_Type", "nunique"),
    Modern_Rate=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
).reset_index()


fig71 = px.scatter(continuity, x='Avg_Field_Size', y='Modern_Rate',
                   color='Region', size='Crop_Diversity',
                   title='<b>71. Farm Continuity Indicators</b><br>Size = Crop Diversity',
                   labels={'Avg_Field_Size': 'Avg Field Size (ha)',
                          'Modern_Rate': 'Modern Methods (%)'})

fig71.update_layout(height=500, plot_bgcolor='white')
fig71.show()


71. Farm Continuity...


### *72. Risk Management Practices*

In [24]:
print("\n72. Risk Management...")

risk_practices = df.groupby("Region", observed=False).agg(
    Mulching_Rate=("Mulching_Used", lambda x: (x == "Yes").mean() * 100),
    Modern_Rate=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
).reset_index()


fig72 = px.imshow(risk_practices.set_index('Region').corr(),
                  text_auto='.2f', color_continuous_scale='RdBu_r',
                  title='<b>72. Risk Management Practice Correlations</b>')

fig72.update_layout(height=500)
fig72.show()


72. Risk Management...


### *73. Digital Tool Usage*

In [26]:
print("\n73. Digital Adoption...")

# Create age proxy from field size
df['Age_Proxy'] = pd.cut(df['Field_Area_hectare'], 
                         bins=[0, 1, 3, 5, 10, 100],
                         labels=['Young', 'Early Career', 'Mid Career', 'Established', 'Senior'])

digital_adoption = df.groupby(["Age_Proxy", "Region"], observed=False).agg(
    Modern_Rate=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Mulching_Rate=("Mulching_Used", lambda x: (x == "Yes").mean() * 100),
).reset_index()

fig73 = px.bar(digital_adoption, x='Age_Proxy', y='Modern_Rate',
               color='Region', barmode='group',
               title='<b>73. Technology Adoption by Farmer Age Group</b>',
               labels={'Modern_Rate': 'Modern Methods (%)', 'Age_Proxy': 'Farm Experience Level'})

fig73.update_layout(height=500, plot_bgcolor='white')
fig73.show()


73. Digital Adoption...


### *74. Women's Participation Impact*

In [28]:
print("\n74. Gender Impact...")

# Use crop type as proxy for women's participation
women_crops = ['Potato', 'Rice']  # Crops often managed by women
df['Women_Participation'] = df['Crop_Type'].isin(women_crops)

gender_impact = df.groupby("Women_Participation", observed=False).agg(
    Mulching_Rate=("Mulching_Used", lambda x: (x == "Yes").mean() * 100),
    Success_Rate=("Irrigation_Need", lambda x: (x == "Low").mean() * 100),
    Water_Efficiency=("Water_Efficiency", "mean"),
).reset_index()


fig74 = px.bar(gender_impact.melt(id_vars='Women_Participation'),
               x='Women_Participation', y='value', color='variable',
               barmode='group', facet_col='variable', facet_col_wrap=2,
               title='<b>74. Management Practices by Crop Type</b>',
               labels={'value': 'Value', 'Women_Participation': 'Crop Type Group'})

fig74.update_layout(height=600, plot_bgcolor='black')
fig74.show()


74. Gender Impact...


### *75. Youth Engagement Levels*

In [33]:
print("\n75. Youth Engagement...")

# Use modern methods as proxy for youth engagement
youth_engagement = df.groupby("Region", observed=False).agg(
    Modern_Rate=("Irrigation_Type", lambda x: x.isin(["Drip", "Sprinkler"]).mean() * 100),
    Innovation_Score=("Water_Efficiency", "mean"),
    Crop_Diversity=("Crop_Type", "nunique"),
).reset_index()

fig75 = px.scatter(
    youth_engagement,
    x="Modern_Rate",
    y="Innovation_Score",
    color="Region",
    size="Crop_Diversity",
    title="<b>75. Innovation Indicators by Region</b><br>Size = Crop Diversity",
    labels={
        "Modern_Rate": "Modern Methods (%)",
        "Innovation_Score": "Water Efficiency",
    },
)

fig75.show()


75. Youth Engagement...
