In [1]:
import pandas as pd

In [7]:
import os

# Set the directory path
input_dir = "../clean_crop_contribution_data/"

# List all files in the directory and filter out those starting with '_'
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv') and not f.startswith('_')]

print(csv_files)

['arecanut.csv', 'arhar_tur.csv', 'bajra.csv', 'banana.csv', 'barley.csv', 'black_pepper.csv', 'cardamom.csv', 'cashewnut.csv', 'castorseed.csv', 'coconut.csv', 'coriander.csv', 'cotton.csv', 'cowpea_lobia.csv', 'dry_chillies.csv', 'garlic.csv', 'ginger.csv', 'gram.csv', 'groundnut.csv', 'guar_seed.csv', 'horse_gram.csv', 'jowar.csv', 'jute.csv', 'khesari.csv', 'linseed.csv', 'maize.csv', 'masoor.csv', 'mesta.csv', 'moong.csv', 'moth.csv', 'niger_seed.csv', 'onion.csv', 'other_cereals.csv', 'other_kharif_pulses.csv', 'other_oilseeds.csv', 'other_rabi_pulses.csv', 'other_summer_pulses.csv', 'peas_and_beans.csv', 'potato.csv', 'ragi.csv', 'rapeseed_and_mustard.csv', 'rice.csv', 'safflower.csv', 'sannhamp.csv', 'sesamum.csv', 'small_millets.csv', 'soyabean.csv', 'sugarcane.csv', 'sunflower.csv', 'sweet_potato.csv', 'tapioca.csv', 'tobacco.csv', 'turmeric.csv', 'urad.csv', 'wheat.csv']


In [12]:
def aggregate_csv(input_file):
    # Read the CSV file
    df = pd.read_csv(os.path.join(input_dir, input_file))

    # Identify year columns (exclude 'State' and 'District')
    year_columns = [col for col in df.columns if col not in ['State', 'District']]

    # Convert year columns to numeric (coerce errors to NaN)
    df[year_columns] = df[year_columns].apply(pd.to_numeric, errors='coerce')

    # Create aggregated DataFrame
    df_agg = df[['State', 'District']].copy()
    df_agg['Mean'] = df[year_columns].mean(axis=1)
    df_agg['Median'] = df[year_columns].median(axis=1)
    df_agg['Max'] = df[year_columns].max(axis=1)

    # Save aggregated DataFrame to new CSV
    output_file = f"{os.path.splitext(input_file)[0]}_agg_yield_data.csv"
    df_agg.to_csv(output_file, index=False)

In [13]:
for csv_file in csv_files:
    aggregate_csv(csv_file)

In [18]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Optional: prettier plots
plt.style.use('seaborn-v0_8-muted')

# GeoJSON URL with "state_name" and "district_name"
geojson_url = "https://bharatviz.saketlab.in/India_LGD_Districts_simplified.geojson"
gdf = gpd.read_file(geojson_url)

# Normalize GeoJSON district names
gdf['district_name'] = gdf['district_name'].str.strip().str.lower()

# List all aggregated yield CSVs in working directory
agg_csvs = [f for f in os.listdir() if f.endswith('_agg_yield_data.csv')]

for csv_file in agg_csvs:
    # Load the aggregated data
    df = pd.read_csv(csv_file)
    df.columns = [c.strip() for c in df.columns]

    # Normalize district names in your data
    df['District'] = df['District'].str.strip().str.lower()

    # Merge with GeoDataFrame
    merged = gdf.merge(df, left_on='district_name', right_on='District', how='left')

    # Get crop name from file
    crop_name = csv_file.replace('_agg_yield_data.csv', '')
    crop_dir = os.path.join(os.getcwd(), crop_name)
    os.makedirs(crop_dir, exist_ok=True)

    for feature in ['Mean', 'Median', 'Max']:
        fig, ax = plt.subplots(figsize=(12, 14))
        merged.plot(
            column=feature,
            cmap='YlGnBu',
            linewidth=0.3,
            edgecolor='white',
            legend=True,
            ax=ax,
            missing_kwds={
                "color": "lightgrey",
                "edgecolor": "lightgrey",
                "hatch": "///",
                "label": "Missing values",
            },
        )

        ax.set_title(f"{crop_name.title()} - {feature} Yield", fontsize=16)
        ax.axis('off')
        plt.tight_layout()

        # Save the figure
        output_path = os.path.join(crop_dir, f"{feature}_choropleth.png")
        plt.savefig(output_path, dpi=300)
        plt.close()

    print(f"✅ Plots created for: {crop_name}")

✅ Plots created for: arecanut
✅ Plots created for: arhar_tur
✅ Plots created for: bajra
✅ Plots created for: banana
✅ Plots created for: barley
✅ Plots created for: black_pepper
✅ Plots created for: cardamom
✅ Plots created for: cashewnut
✅ Plots created for: castorseed
✅ Plots created for: coconut
✅ Plots created for: coriander
✅ Plots created for: cotton
✅ Plots created for: cowpea_lobia
✅ Plots created for: dry_chillies
✅ Plots created for: garlic
✅ Plots created for: ginger
✅ Plots created for: gram
✅ Plots created for: groundnut
✅ Plots created for: guar_seed
✅ Plots created for: horse_gram
✅ Plots created for: jowar
✅ Plots created for: jute
✅ Plots created for: khesari
✅ Plots created for: linseed
✅ Plots created for: maize
✅ Plots created for: masoor
✅ Plots created for: mesta
✅ Plots created for: moong
✅ Plots created for: moth
✅ Plots created for: niger_seed
✅ Plots created for: onion
✅ Plots created for: other_cereals
✅ Plots created for: other_kharif_pulses
✅ Plots created 