In [1]:
# Crop Market Analysis for Kaggle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# File path for Kaggle
FILE_PATH = "./ICRISAT-District Level Data (1).csv"

def load_data():
    """
    Load crop price data from the specified Kaggle file path
    """
    try:
        df = pd.read_csv(FILE_PATH)
        print(f"Successfully loaded data with {len(df)} rows and {len(df.columns)} columns.")
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def analyze_district_prices(df, district_name):
    """
    Analyze crop prices for a specific district and recommend crops
    """
    # Filter data for the specified district
    district_data = df[df['Dist Name'] == district_name]
    
    if len(district_data) == 0:
        return f"No data found for district: {district_name}"
    
    # Extract price columns (assuming all price columns end with "HARVEST PRICE (Rs per Quintal)")
    price_cols = [col for col in df.columns if "HARVEST PRICE" in col]
    
    # Create a dictionary to store crop names and their prices
    crop_prices = {}
    
    for col in price_cols:
        crop_name = col.split("HARVEST PRICE")[0].strip()
        
        # Check if the price is available (not NaN)
        if not pd.isna(district_data[col].values[0]):
            crop_prices[crop_name] = district_data[col].values[0]
    
    # Sort crops by price (highest to lowest)
    sorted_crops = sorted(crop_prices.items(), key=lambda x: x[1], reverse=True)
    
    # Prepare analysis report
    report = {
        "district": district_name,
        "state": district_data['State Name'].values[0] if 'State Name' in district_data.columns else "Unknown",
        "analysis_date": datetime.now().strftime("%Y-%m-%d"),
        "top_crops": sorted_crops[:3],  # Top 3 most expensive crops
        "all_crop_prices": sorted_crops,
        "recommendations": []
    }
    
    # Generate recommendations
    for crop, price in sorted_crops[:3]:
        report["recommendations"].append(f"{crop} can be cropped now for potential high returns (current price: Rs {price} per Quintal)")
    
    return report

def generate_price_chart(crop_prices, district_name):
    """
    Generate a bar chart of crop prices
    """
    crops = [crop.replace(" ", "\n") for crop, _ in crop_prices]  # Add line breaks for long names
    prices = [price for _, price in crop_prices]
    
    plt.figure(figsize=(14, 8))
    
    # Set a style
    sns.set_style("whitegrid")
    
    # Create bar chart with a color palette
    bars = sns.barplot(x=crops, y=prices, palette="viridis")
    
    # Customize appearance
    plt.xlabel('Crops', fontsize=12, fontweight='bold')
    plt.ylabel('Price (Rs per Quintal)', fontsize=12, fontweight='bold')
    plt.title(f'Crop Prices in {district_name}', fontsize=16, fontweight='bold')
    
    # Add value labels on top of bars
    for i, bar in enumerate(bars.patches):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 5,
                f'₹{int(height)}', ha='center', fontsize=9)
    
    plt.tight_layout()
    plt.show()

def comparative_analysis(df, district_name):
    """
    Perform comparative analysis with state and national averages
    """
    district_data = df[df['Dist Name'] == district_name]
    if len(district_data) == 0:
        return None
    
    state_name = district_data['State Name'].values[0]
    state_data = df[df['State Name'] == state_name]
    
    price_cols = [col for col in df.columns if "HARVEST PRICE" in col]
    
    comparison = {}
    
    for col in price_cols:
        crop_name = col.split("HARVEST PRICE")[0].strip()
        
        # District price
        district_price = district_data[col].values[0] if not pd.isna(district_data[col].values[0]) else None
        
        # State average
        state_avg = state_data[col].mean() if not state_data[col].isna().all() else None
        
        # National average
        national_avg = df[col].mean() if not df[col].isna().all() else None
        
        if district_price is not None:
            comparison[crop_name] = {
                "district_price": district_price,
                "state_avg": state_avg,
                "national_avg": national_avg,
                "vs_state": ((district_price / state_avg) - 1) * 100 if state_avg is not None else None,
                "vs_national": ((district_price / national_avg) - 1) * 100 if national_avg is not None else None
            }
    
    return comparison

def run_analysis():
    # Load the data from the specified path
    df = load_data()
    
    if df is not None:
        # Display available districts to help the user
        print("\nAvailable districts in the dataset:")
        available_districts = sorted(df['Dist Name'].unique())
        for i, district in enumerate(available_districts):
            print(f"- {district}")
        
        # Get district name from user
        district_name = input("\nEnter district name from the list above: ")
        
        # Analyze prices for the district
        analysis = analyze_district_prices(df, district_name)
        
        if isinstance(analysis, dict):
            print(f"\n🌾 Crop Market Analysis for {analysis['district']}, {analysis['state']}")
            print(f"📅 Analysis Date: {analysis['analysis_date']}\n")
            
            print("🔝 Top Recommended Crops:")
            for recommendation in analysis['recommendations']:
                print(f"✅ {recommendation}")
            
            print("\n💰 All Crop Prices (Highest to Lowest):")
            for crop, price in analysis['all_crop_prices']:
                print(f"{crop}: Rs {price} per Quintal")
            
            # Generate price chart
            generate_price_chart(analysis['all_crop_prices'], district_name)
            
            # Comparative analysis
            comparison = comparative_analysis(df, district_name)
            if comparison:
                print("\n🔍 Comparative Price Analysis:")
                for crop, data in comparison.items():
                    if data["vs_state"] is not None and data["vs_national"] is not None:
                        state_diff = "higher" if data["vs_state"] > 0 else "lower"
                        national_diff = "higher" if data["vs_national"] > 0 else "lower"
                        
                        print(f"{crop}:")
                        print(f"  - Price: Rs {data['district_price']:.2f} per Quintal")
                        print(f"  - {abs(data['vs_state']):.1f}% {state_diff} than state average")
                        print(f"  - {abs(data['vs_national']):.1f}% {national_diff} than national average")
            
            # Additional market insights
            avg_price = np.mean([price for _, price in analysis['all_crop_prices']])
            print(f"\n📈 Market Insights:")
            print(f"- Average crop price in {district_name}: Rs {avg_price:.2f} per Quintal")
            print(f"- Price differential between highest and lowest crop: Rs {analysis['all_crop_prices'][0][1] - analysis['all_crop_prices'][-1][1]:.2f}")
            print(f"- Number of crops with available price data: {len(analysis['all_crop_prices'])}")
            
            # Make specific planting suggestions
            print("\n🌱 Planting Suggestions:")
            for crop, price in analysis['all_crop_prices'][:3]:
                print(f"- {crop} is recommended for planting now based on current market prices")
            
        else:
            print(analysis)  # Print error message
    else:
        print("Failed to load data. Please check the file path.")

# Run the analysis
run_analysis()

Successfully loaded data with 612 rows and 22 columns.

Available districts in the dataset:
- Chengalpattu MGR / Kanchipuram
- Coimbatore
- Kanyakumari
- Madurai
- North Arcot / Vellore
- Ramananthapuram
- Salem
- South Arcot / Cuddalore
- Thanjavur
- The Nilgiris
- Thirunelveli
- Tiruchirapalli / Trichy
No data found for district: 
