In [9]:
import pandas as pd

# Enhanced dataset for agricultural requirements in Rwanda
rwanda_agricultural_data = {
    'crops': [
        'Maize', 'Rice', 'Sorghum', 'Wheat', 'Millet', 
        'Cassava', 'Sweet Potatoes', 'Irish Potatoes', 'Yams', 'Taro',
        'Beans', 'Soybeans', 'Groundnuts', 'Peas', 'Green Grams',
        'Coffee', 'Tea', 'Pyrethrum', 'Sugarcane', 'Cotton',
        'Banana', 'Avocado', 'Mango', 'Pineapple', 'Passion Fruit', 'Tree Tomato',
        'Tomatoes', 'Cabbage', 'Carrots', 'Onions', 'Green Peppers', 'Eggplant',
        'Sunflower', 'Palm Oil', 'Macadamia', 'Ginger', 'Chili Peppers', 'Vanilla'
    ],
    
    # Updated water requirements data with Rwanda's four seasons
    'base_water_requirements': {
        'low_altitude': {  # Eastern Province (1000-1500m)
            'short_dry_season': [650, 1200, 450, 450, 400, 900, 550, 600, 750, 800, 400, 450, 500, 350, 380, 1600, 2000, 700, 1800, 900, 1800, 800, 1000, 1200, 900, 850, 400, 380, 400, 350, 600, 450, 600, 1200, 900, 800, 500, 900],
            'long_rainy_season': [455, 840, 315, 315, 280, 630, 385, 420, 525, 560, 280, 315, 350, 245, 266, 1120, 1400, 490, 1260, 630, 1260, 560, 700, 840, 630, 595, 280, 266, 280, 245, 420, 315, 420, 840, 630, 560, 350, 630],
            'long_dry_season': [700, 1300, 480, 480, 420, 950, 580, 630, 780, 830, 420, 480, 530, 370, 400, 1700, 2100, 730, 1900, 950, 1900, 830, 1050, 1250, 950, 900, 420, 400, 420, 370, 630, 480, 630, 1250, 950, 830, 530, 950],
            'short_rainy_season': [480, 860, 330, 330, 290, 650, 400, 440, 540, 580, 290, 330, 360, 255, 276, 1150, 1450, 510, 1300, 650, 1300, 580, 720, 860, 650, 615, 290, 276, 290, 255, 440, 330, 440, 860, 650, 580, 360, 650]
        },
        'mid_altitude': {  # Central and Southern Provinces (1500-2000m)
            'short_dry_season': [600, 1100, 400, 425, 375, 850, 500, 550, 700, 750, 375, 425, 475, 325, 355, 1500, 1900, 650, 1700, 850, 1700, 750, 950, 1150, 850, 800, 375, 355, 375, 325, 550, 425, 550, 1150, 850, 750, 475, 850],
            'long_rainy_season': [420, 770, 280, 298, 263, 595, 350, 385, 490, 525, 263, 298, 333, 228, 249, 1050, 1330, 455, 1190, 595, 1190, 525, 665, 805, 595, 560, 263, 249, 263, 228, 385, 298, 385, 805, 595, 525, 333, 595],
            'long_dry_season': [650, 1200, 430, 445, 395, 900, 530, 580, 730, 780, 395, 445, 495, 345, 375, 1600, 2000, 680, 1800, 900, 1800, 780, 980, 1200, 900, 850, 395, 375, 395, 345, 580, 445, 580, 1200, 900, 780, 495, 900],
            'short_rainy_season': [440, 810, 295, 310, 275, 620, 365, 405, 510, 545, 275, 310, 345, 240, 260, 1100, 1380, 470, 1230, 620, 1230, 545, 685, 835, 620, 580, 275, 260, 275, 240, 405, 310, 405, 835, 620, 545, 345, 620]
        },
        'high_altitude': {  # Northern and Western Provinces (>2000m)
            'short_dry_season': [550, 1000, 350, 400, 350, 800, 450, 500, 650, 700, 350, 400, 450, 300, 330, 1400, 1800, 600, 1600, 800, 1600, 700, 900, 1100, 800, 750, 350, 330, 350, 300, 500, 400, 500, 1100, 800, 700, 450, 800],
            'long_rainy_season': [385, 700, 245, 280, 245, 560, 315, 350, 455, 490, 245, 280, 315, 210, 231, 980, 1260, 420, 1120, 560, 1120, 490, 630, 770, 560, 525, 245, 231, 245, 210, 350, 280, 350, 770, 560, 490, 315, 560],
            'long_dry_season': [600, 1100, 380, 430, 380, 850, 480, 530, 680, 730, 380, 430, 480, 320, 350, 1500, 1900, 630, 1700, 850, 1700, 730, 930, 1150, 850, 800, 380, 350, 380, 320, 530, 430, 530, 1150, 850, 730, 480, 850],
            'short_rainy_season': [400, 730, 260, 290, 260, 580, 330, 360, 470, 510, 260, 290, 330, 220, 240, 1000, 1300, 440, 1160, 580, 1160, 510, 650, 800, 580, 540, 260, 240, 260, 220, 360, 290, 360, 800, 580, 510, 330, 580]
        }
    },
    
       # Updated soil type adjustments to include all Rwanda soil types
    'soil_type_adjustments': {
        # General soil classifications
        'sandy': 1.3,
        'loamy': 1.0,
        'clay': 0.8,
        'silty': 0.9,
        'peaty': 0.7,
        
        # Rwanda-specific soil types
        'volcanic': 0.85,
        'andosols': 0.85,
        'nitisols': 0.9,
        'ferralsols': 1.2,
        'acrisol': 1.15,
        'vertisols': 0.75,
        'fluvisols': 0.95,
        'gleysols': 0.7,
        'histosols': 0.65,
        'leptosols': 1.4,
        'clay_loam': 0.85,
        'sandy_loam': 1.15,
        'alluvial': 0.9
    },
    
    'slope_adjustments': {
        '0-5': 1.0, '5-10': 1.2, '10-15': 1.4, '>15': 1.6
    },
    
    'critical_growth_stages': {
        'germination': 1.2, 'vegetative': 1.0, 'flowering': 1.5, 'fruit_development': 1.3, 'maturity': 0.7
    },
    
    'environmental_factors': {
        'temperature_adjustment': {'<15': 0.8, '15-25': 1.0, '25-30': 1.2, '>30': 1.4},
        'humidity_adjustment': {'<40': 1.3, '40-60': 1.1, '60-80': 1.0, '>80': 0.9},
        'wind_speed_adjustment': {'<2': 1.0, '2-5': 1.1, '5-8': 1.2, '>8': 1.3}
    },
    
    # NEW: Nutrient requirements (N-P-K) in kg/ha
    'nutrient_requirements': {
        'nitrogen': [120, 100, 80, 100, 60, 60, 80, 110, 70, 60, 40, 60, 30, 30, 25, 150, 120, 60, 180, 120, 250, 80, 100, 140, 90, 80, 120, 150, 100, 80, 100, 90, 60, 120, 70, 50, 80, 60],
        'phosphorus': [60, 50, 40, 50, 30, 20, 40, 80, 30, 20, 60, 40, 50, 40, 30, 50, 45, 30, 100, 60, 75, 40, 50, 60, 45, 40, 90, 100, 80, 60, 70, 60, 40, 60, 30, 30, 50, 30],
        'potassium': [40, 80, 30, 40, 20, 80, 100, 120, 90, 80, 60, 30, 30, 20, 15, 180, 90, 40, 150, 60, 300, 120, 150, 180, 110, 90, 150, 120, 90, 70, 110, 100, 50, 140, 60, 70, 80, 50]
    },
    
    # NEW: pH preferences
    'ph_preferences': {
        'min_ph': [5.5, 5.5, 5.5, 6.0, 5.5, 5.0, 5.5, 5.5, 5.5, 5.5, 6.0, 6.0, 5.8, 6.0, 6.0, 5.0, 4.5, 5.5, 6.0, 5.8, 5.5, 6.0, 5.5, 5.0, 6.0, 5.5, 5.5, 6.0, 6.0, 6.0, 5.5, 5.5, 6.0, 5.0, 5.5, 5.5, 5.5, 6.0],
        'max_ph': [7.5, 7.0, 7.5, 7.5, 7.5, 6.5, 6.5, 6.5, 6.5, 6.5, 7.5, 7.0, 7.0, 7.5, 7.5, 6.0, 5.5, 7.0, 7.5, 7.0, 7.0, 7.0, 7.0, 6.5, 7.0, 6.5, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.5, 6.5, 6.5, 6.5, 7.0, 7.0],
        'optimal_ph': [6.0, 6.0, 6.0, 6.5, 6.0, 5.8, 6.0, 6.0, 6.0, 6.0, 6.5, 6.5, 6.5, 6.5, 6.5, 5.5, 5.0, 6.0, 6.5, 6.5, 6.0, 6.5, 6.0, 5.5, 6.5, 6.0, 6.0, 6.5, 6.5, 6.5, 6.0, 6.0, 6.5, 5.5, 6.0, 6.0, 6.0, 6.5]
    },
    
    # NEW: Planting seasons (month numbers 1-12, multiple months separated by commas)
    'planting_seasons': {
        'primary_season': ['2-3', '1-2,9-10', '9-10', '2-3', '9-10', '9-11', '9-10', '1-2,6-7', '2-3', '2-3', '2-3,9-10', '2-3', '2-3', '2-3', '2-3', '3-4,9-10', '3-4', '9-10', '2-3', '2-3', '2-3,9-10', '3-4', '3-4', '3-4', '2-3', '2-3', '1-3,9-10', '1-3,9-10', '2-3,9-10', '2-3,9-10', '2-3,9-10', '2-3,9-10', '2-3', '3-4', '3-4', '3-4', '2-3,9-10', '3-4'],
        'harvest_time_days': [90, 120, 100, 110, 90, 300, 150, 100, 270, 300, 90, 110, 130, 70, 70, 300, 1095, 150, 365, 180, 365, 1095, 1825, 540, 365, 365, 75, 90, 75, 90, 80, 90, 100, 1095, 540, 240, 90, 270]
    },
    
    # NEW: Pest and disease vulnerability (scale 1-10, 10 being most vulnerable)
    'vulnerability': {
        'pest_vulnerability': [7, 6, 5, 8, 5, 4, 6, 7, 5, 5, 7, 5, 6, 7, 6, 8, 6, 5, 7, 8, 8, 6, 7, 7, 8, 7, 9, 8, 6, 7, 8, 8, 6, 7, 5, 6, 7, 8],
        'disease_vulnerability': [8, 7, 6, 7, 6, 5, 7, 8, 6, 6, 8, 6, 7, 8, 7, 9, 7, 6, 7, 7, 9, 7, 8, 8, 8, 8, 9, 9, 7, 8, 8, 9, 7, 8, 6, 7, 8, 9],
        'common_pests': ['Fall armyworm,Stalk borers', 'Rice weevil,Stem borers', 'Sorghum midge,Birds', 'Aphids,Hessian fly', 'Birds,Shoot fly', 'Whiteflies,Cassava mealybug', 'Weevils,Vine borers', 'Colorado beetle,Aphids', 'Tuber beetles,Scale insects', 'Aphids,Taro beetles', 'Bean fly,Aphids', 'Leaf beetles,Pod borers', 'Aphids,Thrips', 'Pea weevil,Aphids', 'Pod borers,Bean flies', 'Coffee berry borer,Leaf miners', 'Tea mosquito,Thrips', 'Aphids,Thrips', 'Stem borers,Aphids', 'Bollworms,Aphids', 'Banana weevil,Thrips', 'Fruit flies,Mites', 'Fruit flies,Mango hoppers', 'Fruit flies,Mealybugs', 'Fruit flies,Mites', 'Fruit flies,Aphids', 'Whiteflies,Hornworms', 'Cabbage loopers,Aphids', 'Carrot fly,Aphids', 'Onion thrips,Onion maggot', 'Aphids,Thrips', 'Fruit flies,Flea beetles', 'Head moth,Stem weevil', 'Red palm weevil,Rhinoceros beetle', 'Nut borers,Aphids', 'Root grubs,Thrips', 'Aphids,Thrips', 'Mealybugs,Scales'],
        'common_diseases': ['Gray leaf spot,Common rust', 'Rice blast,Bacterial blight', 'Anthracnose,Downy mildew', 'Powdery mildew,Rust', 'Downy mildew,Smut', 'Cassava mosaic,Bacterial blight', 'Sweet potato virus,Black rot', 'Late blight,Early blight', 'Anthracnose,Yam mosaic virus', 'Phytophthora,Pythium rot', 'Bean common mosaic,Anthracnose', 'Bacterial blight,Mosaic virus', 'Leaf spot,Rust', 'Powdery mildew,Fusarium wilt', 'Powdery mildew,Root rot', 'Coffee leaf rust,Coffee berry disease', 'Blister blight,Root rot', 'Leaf spot,Root rot', 'Red rot,Smut', 'Bacterial blight,Fusarium wilt', 'Panama disease,Black Sigatoka', 'Anthracnose,Root rot', 'Anthracnose,Powdery mildew', 'Heart rot,Fusariosis', 'Fusarium wilt,Brown spot', 'Fusarium wilt,Anthracnose', 'Early blight,Late blight', 'Black rot,Clubroot', 'Leaf blight,Root rot', 'Purple blotch,Downy mildew', 'Bacterial spot,Powdery mildew', 'Verticillium wilt,Phomopsis blight', 'Downy mildew,White rust', 'Ganoderma butt rot,Fusarium wilt', 'Powdery mildew,Phytophthora', 'Bacterial wilt,Fusarium yellows', 'Bacterial spot,Anthracnose', 'Black rot,Fusarium']
    },
    
    # NEW: Intercropping compatibility (crops that work well together)
    'intercropping_compatibility': [
        'Beans,Peas,Cabbage', 'None', 'Groundnuts,Cowpeas', 'Beans,Peas', 'Groundnuts,Cowpeas', 
        'Maize,Beans', 'Maize,Beans', 'Maize,Beans,Peas', 'Beans,Sweet Potatoes', 'Beans,Cabbage',
        'Maize,Irish Potatoes', 'Maize,Groundnuts', 'Maize,Sorghum', 'Carrots,Radishes', 'Maize,Sorghum',
        'Banana,Beans', 'None', 'None', 'None', 'None',
        'Beans,Coffee', 'None', 'None', 'None', 'None', 'None',
        'Onions,Carrots,Basil', 'Onions,Beans', 'Onions,Lettuce', 'Carrots,Tomatoes', 'Onions,Carrots', 'Beans,Maize',
        'None', 'None', 'None', 'Beans,Maize', 'Onions,Carrots', 'None'
    ],
    
    # NEW: Drought resistance ratings (scale 1-10, 10 being most resistant)
    'drought_resistance': [5, 3, 8, 6, 8, 7, 7, 5, 6, 6, 5, 6, 8, 4, 7, 6, 5, 7, 4, 6, 4, 7, 7, 5, 5, 5, 4, 4, 5, 6, 5, 5, 7, 6, 7, 6, 6, 4],
    
    # NEW: Expected yield data (tons per hectare)
    'expected_yield': {
        'optimal_conditions': [6.0, 5.0, 3.0, 4.0, 2.5, 25.0, 18.0, 20.0, 15.0, 8.0, 2.5, 2.2, 2.0, 2.0, 1.5, 1.0, 2.5, 0.8, 110.0, 2.0, 30.0, 10.0, 20.0, 60.0, 15.0, 25.0, 25.0, 50.0, 30.0, 25.0, 15.0, 35.0, 2.0, 4.0, 1.5, 25.0, 10.0, 0.2],
        'average_conditions': [3.0, 3.5, 1.5, 2.5, 1.2, 12.0, 10.0, 12.0, 8.0, 4.0, 1.2, 1.0, 1.0, 1.0, 0.8, 0.6, 1.5, 0.5, 70.0, 1.2, 15.0, 6.0, 12.0, 40.0, 10.0, 15.0, 15.0, 30.0, 18.0, 15.0, 10.0, 20.0, 1.2, 2.5, 0.8, 15.0, 6.0, 0.1]
    },
    
    # NEW: Sunlight requirements (hours per day)
    'sunlight_requirements': {
        'minimum': [6, 6, 6, 6, 6, 5, 6, 6, 6, 5, 6, 6, 6, 6, 6, 4, 5, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 5, 5, 6, 5],
        'optimal': [8, 8, 8, 8, 8, 7, 8, 7, 8, 7, 8, 8, 8, 7, 8, 6, 6, 8, 8, 10, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 7, 7, 8, 6]
    },
    
    # NEW: Seed spacing recommendations (cm)
    'planting_spacing': {
        'row_spacing': [75, 30, 75, 20, 60, 100, 75, 75, 100, 75, 40, 45, 45, 30, 30, 250, 120, 60, 150, 90, 300, 600, 800, 120, 300, 300, 60, 45, 30, 30, 60, 60, 75, 900, 800, 30, 45, 250],
        'plant_spacing': [25, 15, 15, 5, 15, 100, 30, 30, 30, 60, 10, 5, 15, 5, 5, 250, 75, 45, 30, 30, 300, 600, 800, 60, 300, 300, 45, 30, 8, 10, 45, 60, 30, 900, 800, 15, 45, 250],
        'planting_depth': [5, 2, 3, 3, 2, 10, 10, 8, 10, 8, 4, 4, 5, 3, 3, 15, 10, 2, 5, 3, 30, 15, 10, 10, 10, 10, 1, 1, 1, 1, 1, 1, 3, 20, 3, 2, 1, 1]
    },
    
    # NEW: Crop rotation suggestions
    'crop_rotation': {
        'good_previous_crops': [
            'Beans,Soybeans,Groundnuts', 'None', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts',
            'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet',
            'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet', 'Maize,Sorghum,Millet',
            'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts',
            'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts',
            'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts',
            'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts'
        ],
        'bad_previous_crops': [
            'Maize,Sorghum,Millet', 'Rice', 'Maize,Sorghum,Millet', 'Wheat', 'Maize,Sorghum,Millet',
            'Cassava,Sweet Potatoes,Irish Potatoes', 'Cassava,Sweet Potatoes,Irish Potatoes', 'Cassava,Sweet Potatoes,Irish Potatoes', 'Cassava,Sweet Potatoes,Irish Potatoes', 'Cassava,Sweet Potatoes,Irish Potatoes',
            'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts', 'Beans,Soybeans,Groundnuts',
            'Coffee', 'Tea', 'Pyrethrum', 'Sugarcane', 'Cotton',
            'Banana', 'Avocado', 'Mango', 'Pineapple', 'Passion Fruit', 'Tree Tomato',
            'Tomatoes,Cabbage,Carrots', 'Tomatoes,Cabbage,Carrots', 'Tomatoes,Cabbage,Carrots', 'Tomatoes,Cabbage,Carrots', 'Tomatoes,Cabbage,Carrots', 'Tomatoes,Cabbage,Carrots',
            'Sunflower', 'Palm Oil', 'Macadamia', 'Ginger', 'Chili Peppers', 'Vanilla'
        ],
        'fallow_period_recommended': [0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }
}

# Convert to DataFrame for easier manipulation
df = pd.DataFrame({
    'crop': rwanda_agricultural_data['crops'],
    
    # Water requirements
    'low_altitude_short_dry': rwanda_agricultural_data['base_water_requirements']['low_altitude']['short_dry_season'],
    'low_altitude_long_rainy': rwanda_agricultural_data['base_water_requirements']['low_altitude']['long_rainy_season'],
    'low_altitude_long_dry': rwanda_agricultural_data['base_water_requirements']['low_altitude']['long_dry_season'],
    'low_altitude_short_rainy': rwanda_agricultural_data['base_water_requirements']['low_altitude']['short_rainy_season'],
    
    'mid_altitude_short_dry': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['short_dry_season'],
    'mid_altitude_long_rainy': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['long_rainy_season'],
    'mid_altitude_long_dry': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['long_dry_season'],
    'mid_altitude_short_rainy': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['short_rainy_season'],
    
    'high_altitude_short_dry': rwanda_agricultural_data['base_water_requirements']['high_altitude']['short_dry_season'],
    'high_altitude_long_rainy': rwanda_agricultural_data['base_water_requirements']['high_altitude']['long_rainy_season'],
    'high_altitude_long_dry': rwanda_agricultural_data['base_water_requirements']['high_altitude']['long_dry_season'],
    'high_altitude_short_rainy': rwanda_agricultural_data['base_water_requirements']['high_altitude']['short_rainy_season'],
    
        # Nutrient requirements
    'nitrogen_req': rwanda_agricultural_data['nutrient_requirements']['nitrogen'],
    'phosphorus_req': rwanda_agricultural_data['nutrient_requirements']['phosphorus'],
    'potassium_req': rwanda_agricultural_data['nutrient_requirements']['potassium'],
    
    # pH preferences
    'min_ph': rwanda_agricultural_data['ph_preferences']['min_ph'],
    'max_ph': rwanda_agricultural_data['ph_preferences']['max_ph'],
    'optimal_ph': rwanda_agricultural_data['ph_preferences']['optimal_ph'],
    
    # Planting and harvesting
    'planting_season': rwanda_agricultural_data['planting_seasons']['primary_season'],
    'days_to_harvest': rwanda_agricultural_data['planting_seasons']['harvest_time_days'],
    
    # Vulnerabilities
    'pest_vulnerability': rwanda_agricultural_data['vulnerability']['pest_vulnerability'],
    'disease_vulnerability': rwanda_agricultural_data['vulnerability']['disease_vulnerability'],
    'common_pests': rwanda_agricultural_data['vulnerability']['common_pests'],
    'common_diseases': rwanda_agricultural_data['vulnerability']['common_diseases'],
    
    # Intercropping
    'intercropping_compatibility': rwanda_agricultural_data['intercropping_compatibility'],
    
    # Drought resistance
    'drought_resistance': rwanda_agricultural_data['drought_resistance'],
    
    # Yield data
    'optimal_yield': rwanda_agricultural_data['expected_yield']['optimal_conditions'],
    'average_yield': rwanda_agricultural_data['expected_yield']['average_conditions'],
    
    # Sunlight requirements
    'min_sunlight_hours': rwanda_agricultural_data['sunlight_requirements']['minimum'],
    'optimal_sunlight_hours': rwanda_agricultural_data['sunlight_requirements']['optimal'],
    
    # Spacing recommendations
    'row_spacing_cm': rwanda_agricultural_data['planting_spacing']['row_spacing'],
    'plant_spacing_cm': rwanda_agricultural_data['planting_spacing']['plant_spacing'],
    'planting_depth_cm': rwanda_agricultural_data['planting_spacing']['planting_depth'],
    
    # Crop rotation
    'good_previous_crops': rwanda_agricultural_data['crop_rotation']['good_previous_crops'],
    'bad_previous_crops': rwanda_agricultural_data['crop_rotation']['bad_previous_crops'],
    'fallow_recommended': rwanda_agricultural_data['crop_rotation']['fallow_period_recommended']
})

# Function to apply water requirement adjustments based on soil type, slope, environmental factors, and growth stage
def calculate_adjusted_water_requirements(row, soil_type, slope, temp, humidity, wind_speed, growth_stage):
    # Get adjustment factors
    soil_factor = rwanda_agricultural_data['soil_type_adjustments'].get(soil_type, 1.0)
    slope_factor = rwanda_agricultural_data['slope_adjustments'].get(slope, 1.0)
    
    # Handle temperature ranges
    if temp == '15-25':
        temp_factor = rwanda_agricultural_data['environmental_factors']['temperature_adjustment'].get('15-25', 1.0)
    elif temp == '<15':
        temp_factor = rwanda_agricultural_data['environmental_factors']['temperature_adjustment'].get('<15', 0.8)
    elif temp == '25-30':
        temp_factor = rwanda_agricultural_data['environmental_factors']['temperature_adjustment'].get('25-30', 1.2)
    elif temp == '>30':
        temp_factor = rwanda_agricultural_data['environmental_factors']['temperature_adjustment'].get('>30', 1.4)
    else:
        temp_factor = 1.0
    
    # Handle humidity ranges
    if humidity == '60-80':
        humidity_factor = rwanda_agricultural_data['environmental_factors']['humidity_adjustment'].get('60-80', 1.0)
    elif humidity == '<40':
        humidity_factor = rwanda_agricultural_data['environmental_factors']['humidity_adjustment'].get('<40', 1.3)
    elif humidity == '40-60':
        humidity_factor = rwanda_agricultural_data['environmental_factors']['humidity_adjustment'].get('40-60', 1.1)
    elif humidity == '>80':
        humidity_factor = rwanda_agricultural_data['environmental_factors']['humidity_adjustment'].get('>80', 0.9)
    else:
        humidity_factor = 1.0
    
    # Handle wind speed ranges
    if wind_speed == '2-5':
        wind_factor = rwanda_agricultural_data['environmental_factors']['wind_speed_adjustment'].get('2-5', 1.1)
    elif wind_speed == '<2':
        wind_factor = rwanda_agricultural_data['environmental_factors']['wind_speed_adjustment'].get('<2', 1.0)
    elif wind_speed == '5-8':
        wind_factor = rwanda_agricultural_data['environmental_factors']['wind_speed_adjustment'].get('5-8', 1.2)
    elif wind_speed == '>8':
        wind_factor = rwanda_agricultural_data['environmental_factors']['wind_speed_adjustment'].get('>8', 1.3)
    else:
        wind_factor = 1.0
    
    # Growth stage factor
    growth_factor = rwanda_agricultural_data['critical_growth_stages'].get(growth_stage, 1.0)
    
    # Create adjusted water requirement columns
    result = row.copy()
    
    # Apply adjustments to all altitude and season combinations
    for altitude in ['low', 'mid', 'high']:
        for season in ['short_dry', 'long_dry', 'short_rainy', 'long_rainy']:
            base_column = f'{altitude}_altitude_{season}'
            adjusted_column = f'{altitude}_altitude_{season}_adjusted'
            result[adjusted_column] = row[base_column] * soil_factor * slope_factor * temp_factor * humidity_factor * wind_factor * growth_factor
    
    return result

# Calculate water requirements for a specific scenario
def get_crop_water_requirements(crop_name, altitude, season, soil_type, slope, temp, humidity, wind_speed, growth_stage):
    """
    Calculate water requirements for a specific crop and conditions.
    
    Parameters:
    - crop_name: Name of the crop
    - altitude: 'low', 'mid', or 'high'
    - season: 'short_dry' or 'long_dry' or 'short_rainy' or 'long_rainy'
    - soil_type: 'sandy', 'loamy', 'clay', 'silty', or 'peaty'
    - slope: '0-5', '5-10', '10-15', or '>15'
    - temp: '<15', '15-25', '25-30', or '>30'
    - humidity: '<40', '40-60', '60-80', or '>80'
    - wind_speed: '<2', '2-5', '5-8', or '>8'
    - growth_stage: 'germination', 'vegetative', 'flowering', 'fruit_development', or 'maturity'
    
    Returns:
    - Water requirement in mm
    """
    # Find the crop index
    try:
        crop_idx = rwanda_agricultural_data['crops'].index(crop_name)
    except ValueError:
        return f"Crop '{crop_name}' not found in database."
    
    # Get base water requirement
    base_water = rwanda_agricultural_data['base_water_requirements'][f'{altitude}_altitude'][f'{season}_season'][crop_idx]
    
    # Get adjustment factors
    soil_factor = rwanda_agricultural_data['soil_type_adjustments'].get(soil_type, 1.0)
    slope_factor = rwanda_agricultural_data['slope_adjustments'].get(slope, 1.0)
    temp_factor = rwanda_agricultural_data['environmental_factors']['temperature_adjustment'].get(temp, 1.0)
    humidity_factor = rwanda_agricultural_data['environmental_factors']['humidity_adjustment'].get(humidity, 1.0)
    wind_factor = rwanda_agricultural_data['environmental_factors']['wind_speed_adjustment'].get(wind_speed, 1.0)
    growth_factor = rwanda_agricultural_data['critical_growth_stages'].get(growth_stage, 1.0)
    
    # Calculate adjusted water requirement
    adjusted_water = base_water * soil_factor * slope_factor * temp_factor * humidity_factor * wind_factor * growth_factor
    
    return adjusted_water

# Function to get complete crop requirements for planning
def get_crop_requirements(crop_name):
    """
    Get comprehensive requirements for a given crop.
    
    Parameters:
    - crop_name: Name of the crop
    
    Returns:
    - Dictionary of crop requirements
    """
    try:
        crop_idx = rwanda_agricultural_data['crops'].index(crop_name)
    except ValueError:
        return f"Crop '{crop_name}' not found in database."
    
    # Construct requirement dictionary
    requirements = {
        'crop': crop_name,
        'water_requirements': {
        'low_altitude': {
            'short_dry_season': rwanda_agricultural_data['base_water_requirements']['low_altitude']['short_dry_season'][crop_idx],
            'long_rainy_season': rwanda_agricultural_data['base_water_requirements']['low_altitude']['long_rainy_season'][crop_idx],
            'long_dry_season': rwanda_agricultural_data['base_water_requirements']['low_altitude']['long_dry_season'][crop_idx],
            'short_rainy_season': rwanda_agricultural_data['base_water_requirements']['low_altitude']['short_rainy_season'][crop_idx]
        },
        'mid_altitude': {
            'short_dry_season': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['short_dry_season'][crop_idx],
            'long_rainy_season': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['long_rainy_season'][crop_idx],
            'long_dry_season': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['long_dry_season'][crop_idx],
            'short_rainy_season': rwanda_agricultural_data['base_water_requirements']['mid_altitude']['short_rainy_season'][crop_idx]
        },
        'high_altitude': {
            'short_dry_season': rwanda_agricultural_data['base_water_requirements']['high_altitude']['short_dry_season'][crop_idx],
            'long_rainy_season': rwanda_agricultural_data['base_water_requirements']['high_altitude']['long_rainy_season'][crop_idx],
            'long_dry_season': rwanda_agricultural_data['base_water_requirements']['high_altitude']['long_dry_season'][crop_idx],
            'short_rainy_season': rwanda_agricultural_data['base_water_requirements']['high_altitude']['short_rainy_season'][crop_idx]
        }
    },
        'nutrient_requirements': {
            'nitrogen_kg_per_ha': rwanda_agricultural_data['nutrient_requirements']['nitrogen'][crop_idx],
            'phosphorus_kg_per_ha': rwanda_agricultural_data['nutrient_requirements']['phosphorus'][crop_idx],
            'potassium_kg_per_ha': rwanda_agricultural_data['nutrient_requirements']['potassium'][crop_idx]
        },
        'ph_preferences': {
            'minimum': rwanda_agricultural_data['ph_preferences']['min_ph'][crop_idx],
            'maximum': rwanda_agricultural_data['ph_preferences']['max_ph'][crop_idx],
            'optimal': rwanda_agricultural_data['ph_preferences']['optimal_ph'][crop_idx]
        },
        'planting_info': {
            'planting_season': rwanda_agricultural_data['planting_seasons']['primary_season'][crop_idx],
            'days_to_harvest': rwanda_agricultural_data['planting_seasons']['harvest_time_days'][crop_idx],
            'row_spacing_cm': rwanda_agricultural_data['planting_spacing']['row_spacing'][crop_idx],
            'plant_spacing_cm': rwanda_agricultural_data['planting_spacing']['plant_spacing'][crop_idx],
            'planting_depth_cm': rwanda_agricultural_data['planting_spacing']['planting_depth'][crop_idx]
        },
        'vulnerability': {
            'pest_vulnerability': rwanda_agricultural_data['vulnerability']['pest_vulnerability'][crop_idx],
            'disease_vulnerability': rwanda_agricultural_data['vulnerability']['disease_vulnerability'][crop_idx],
            'common_pests': rwanda_agricultural_data['vulnerability']['common_pests'][crop_idx].split(','),
            'common_diseases': rwanda_agricultural_data['vulnerability']['common_diseases'][crop_idx].split(',')
        },
        'climate_needs': {
            'drought_resistance': rwanda_agricultural_data['drought_resistance'][crop_idx],
            'min_sunlight_hours': rwanda_agricultural_data['sunlight_requirements']['minimum'][crop_idx],
            'optimal_sunlight_hours': rwanda_agricultural_data['sunlight_requirements']['optimal'][crop_idx]
        },
        'farming_practices': {
            'intercropping_compatibility': rwanda_agricultural_data['intercropping_compatibility'][crop_idx].split(','),
            'good_previous_crops': rwanda_agricultural_data['crop_rotation']['good_previous_crops'][crop_idx].split(','),
            'bad_previous_crops': rwanda_agricultural_data['crop_rotation']['bad_previous_crops'][crop_idx].split(','),
            'fallow_recommended': bool(rwanda_agricultural_data['crop_rotation']['fallow_period_recommended'][crop_idx])
        },
        'yield_data': {
            'optimal_yield_tons_per_ha': rwanda_agricultural_data['expected_yield']['optimal_conditions'][crop_idx],
            'average_yield_tons_per_ha': rwanda_agricultural_data['expected_yield']['average_conditions'][crop_idx]
        }
    }
    
    return requirements

# Create a complete dataset with all adjusted water requirements
def create_comprehensive_dataset(soil_type='loamy', slope='0-5', temp='15-25', humidity='60-80', wind_speed='2-5', growth_stage='vegetative'):
    """
    Create a comprehensive dataset with all crop requirements including adjusted water requirements.
    
    Returns:
    - Pandas DataFrame with all crop requirements
    """
    # Apply water requirement adjustments
    df_adjusted = df.apply(
        lambda row: calculate_adjusted_water_requirements(
            row, soil_type, slope, temp, humidity, wind_speed, growth_stage
        ), 
        axis=1
    )
    
    return df_adjusted

# Function to recommend crops based on specific Rwandan conditions
def recommend_crops(altitude, season, soil_type, slope, ph_level, drought_risk=False, pest_pressure=False, min_yield=None):
    """
    Recommend suitable crops based on specific environmental conditions in Rwanda.
    
    Parameters:
    - altitude: 'low' (0-900m), 'mid' (900-1500m), or 'high' (>1500m)
    - season: 'short_dry' (mid-Dec to mid-Mar), 'long_rainy' (mid-Mar to mid-May), 
              'long_dry' (mid-May to mid-Sep), or 'short_rainy' (mid-Sep to mid-Dec)
    - soil_type: 'andosols', 'ferralsols', 'nitisols', 'vertisols', 'histosols', 
                 'acrisol', 'gleysols', 'regosols', or 'luvisols'
    - slope: '0-8' (flat to gentle), '8-15' (moderate), '15-30' (steep), or '>30' (very steep)
    - ph_level: Numeric pH value
    - drought_risk: Boolean indicating if drought is likely
    - pest_pressure: Boolean indicating if pest pressure is high
    - min_yield: Minimum acceptable yield in tons per hectare
    
    Returns:
    - List of recommended crops with suitability scores and additional information
    """
    # Common crops in Rwanda
    rwandan_crops = [
        'Maize', 'Beans', 'Sorghum', 'Rice', 'Wheat', 'Soybean', 'Groundnuts',
        'Irish Potatoes', 'Sweet Potatoes', 'Cassava', 'Yams', 'Bananas', 'Plantain',
        'Coffee', 'Tea', 'Pyrethrum', 'Sugarcane', 'Vegetables', 'Fruits',
        'Millet', 'Peas', 'Taro (Colocasia)', 'Passion Fruit', 'Pineapple', 'Avocado'
    ]
    
    # Water requirements by altitude, season, and crop (mm per growing season)
    water_requirements = {
        'low_altitude': {
            'short_dry_season': [600, 400, 500, 1200, 550, 450, 500, 700, 600, 700, 800, 1200, 1100, 1400, 1500, 900, 1800, 500, 800, 450, 400, 900, 900, 1000, 800],
            'long_rainy_season': [500, 350, 400, 1000, 450, 400, 450, 600, 500, 600, 700, 1000, 900, 1200, 1300, 800, 1600, 400, 700, 400, 350, 800, 800, 900, 700],
            'long_dry_season': [650, 450, 550, 1300, 600, 500, 550, 750, 650, 750, 850, 1300, 1200, 1500, 1600, 950, 1900, 550, 850, 500, 450, 950, 950, 1050, 850],
            'short_rainy_season': [550, 375, 425, 1050, 475, 425, 475, 625, 525, 625, 725, 1050, 950, 1250, 1350, 825, 1650, 425, 725, 425, 375, 825, 825, 925, 725]
        },
        'mid_altitude': {
            'short_dry_season': [550, 380, 480, 1150, 525, 430, 480, 675, 575, 675, 775, 1150, 1050, 1350, 1450, 875, 1750, 480, 775, 430, 380, 875, 875, 975, 775],
            'long_rainy_season': [475, 335, 380, 975, 425, 380, 425, 575, 475, 575, 675, 975, 875, 1150, 1250, 775, 1550, 380, 675, 380, 335, 775, 775, 875, 675],
            'long_dry_season': [600, 425, 525, 1250, 575, 475, 525, 725, 625, 725, 825, 1250, 1150, 1450, 1550, 925, 1850, 525, 825, 475, 425, 925, 925, 1025, 825],
            'short_rainy_season': [525, 350, 400, 1025, 450, 400, 450, 600, 500, 600, 700, 1025, 925, 1200, 1300, 800, 1625, 400, 700, 400, 350, 800, 800, 900, 700]
        },
        'high_altitude': {
            'short_dry_season': [525, 360, 460, 1100, 500, 410, 460, 650, 550, 650, 750, 1100, 1000, 1300, 1400, 850, 1700, 460, 750, 410, 360, 850, 850, 950, 750],
            'long_rainy_season': [450, 320, 360, 950, 400, 360, 400, 550, 450, 550, 650, 950, 850, 1100, 1200, 750, 1500, 360, 650, 360, 320, 750, 750, 850, 650],
            'long_dry_season': [575, 400, 500, 1200, 550, 450, 500, 700, 600, 700, 800, 1200, 1100, 1400, 1500, 900, 1800, 500, 800, 450, 400, 900, 900, 1000, 800],
            'short_rainy_season': [500, 335, 385, 1000, 425, 385, 425, 575, 475, 575, 675, 1000, 900, 1175, 1275, 775, 1600, 385, 675, 385, 335, 775, 775, 875, 675]
        }
    }
    
    # pH preferences by crop
    ph_preferences = {
        'min_ph': [5.5, 5.5, 5.8, 5.5, 5.8, 5.8, 5.3, 5.0, 5.0, 5.5, 5.5, 5.5, 5.5, 4.5, 4.5, 5.0, 6.0, 5.5, 5.5, 5.5, 5.8, 5.5, 5.5, 5.0, 5.5],
        'max_ph': [7.5, 7.5, 7.5, 7.0, 7.5, 7.5, 7.0, 6.5, 6.5, 7.0, 7.0, 7.0, 7.0, 6.0, 6.0, 6.5, 7.5, 7.0, 7.0, 7.0, 7.0, 6.5, 7.0, 6.5, 7.0]
    }
    
    # Drought resistance (1-10 scale, 10 being most resistant)
    drought_resistance = [5, 6, 8, 3, 5, 6, 7, 4, 6, 8, 7, 5, 5, 6, 5, 4, 4, 4, 5, 8, 6, 5, 6, 6, 7]
    
    # Pest vulnerability (1-10 scale, 10 being most vulnerable)
    pest_vulnerability = [7, 6, 5, 7, 6, 5, 6, 8, 7, 5, 6, 8, 8, 7, 6, 5, 6, 8, 7, 5, 6, 7, 7, 7, 6]
    
    # Expected yield under average conditions (tons/hectare)
    expected_yield = [3.5, 1.5, 2.0, 4.0, 2.5, 1.8, 1.5, 20.0, 15.0, 25.0, 18.0, 22.0, 15.0, 1.0, 2.5, 1.0, 80.0, 12.0, 15.0, 1.5, 1.5, 10.0, 15.0, 40.0, 12.0]
    
    # Soil preferences (suitability 1-10, 10 being most suitable)
    soil_preferences = {
        'andosols': [9, 7, 6, 5, 8, 7, 6, 9, 8, 7, 7, 8, 8, 9, 9, 9, 7, 8, 8, 6, 7, 8, 8, 8, 8],
        'ferralsols': [7, 8, 7, 5, 6, 8, 8, 7, 8, 8, 8, 7, 7, 8, 7, 6, 6, 7, 7, 7, 8, 7, 7, 7, 7],
        'nitisols': [8, 9, 8, 6, 8, 9, 8, 8, 8, 8, 8, 9, 9, 9, 8, 7, 8, 8, 8, 8, 9, 8, 8, 8, 8],
        'vertisols': [6, 7, 7, 9, 7, 7, 5, 5, 6, 6, 6, 6, 6, 5, 5, 5, 8, 7, 6, 7, 7, 6, 6, 5, 6],
        'histosols': [5, 6, 6, 8, 5, 6, 5, 7, 7, 7, 7, 7, 7, 6, 5, 5, 7, 8, 7, 6, 6, 8, 7, 7, 7],
        'acrisol': [7, 8, 7, 5, 6, 8, 7, 6, 7, 7, 7, 7, 7, 8, 7, 6, 5, 6, 7, 7, 8, 7, 7, 7, 7],
        'gleysols': [4, 5, 5, 9, 4, 5, 4, 5, 6, 6, 6, 6, 6, 5, 4, 4, 7, 6, 5, 5, 5, 7, 5, 6, 5],
        'regosols': [6, 7, 7, 4, 5, 7, 7, 6, 7, 7, 7, 6, 6, 6, 5, 6, 5, 6, 6, 7, 7, 6, 6, 6, 6],
        'luvisols': [8, 8, 7, 6, 7, 8, 7, 7, 7, 7, 7, 8, 8, 7, 6, 6, 7, 7, 7, 7, 8, 7, 7, 7, 7]
    }
    
    # Slope preferences (maximum suitable slope in degrees)
    slope_tolerance = [15, 20, 15, 5, 12, 15, 15, 15, 20, 20, 15, 25, 25, 30, 35, 25, 8, 15, 20, 15, 18, 15, 25, 15, 25]
    
    # Altitude suitability (1-10 scale, 10 being most suitable)
    altitude_suitability = {
        'low': [9, 8, 9, 10, 5, 8, 9, 6, 7, 9, 9, 10, 10, 7, 5, 3, 10, 8, 9, 8, 7, 9, 9, 9, 8],
        'mid': [10, 10, 8, 7, 9, 9, 8, 10, 10, 8, 8, 9, 9, 10, 9, 8, 8, 9, 9, 7, 9, 8, 10, 10, 10],
        'high': [6, 7, 6, 3, 10, 7, 6, 9, 8, 5, 5, 6, 6, 5, 10, 10, 4, 7, 7, 6, 8, 5, 7, 6, 7]
    }
    
    # Season suitability (1-10 scale, 10 being most suitable)
    season_suitability = {
        'short_dry_season': [6, 5, 7, 4, 7, 6, 8, 7, 6, 5, 5, 6, 6, 7, 8, 7, 5, 5, 6, 7, 5, 5, 6, 6, 7],
        'long_rainy_season': [9, 10, 7, 10, 8, 9, 7, 8, 9, 8, 8, 9, 9, 8, 9, 9, 9, 9, 8, 7, 10, 9, 9, 8, 8],
        'long_dry_season': [5, 4, 8, 3, 6, 5, 9, 6, 5, 4, 4, 5, 5, 6, 7, 6, 4, 4, 5, 8, 4, 4, 5, 5, 6],
        'short_rainy_season': [8, 9, 6, 9, 7, 8, 6, 7, 8, 7, 7, 8, 8, 7, 8, 8, 8, 8, 7, 6, 9, 8, 8, 7, 7]
    }
    
    # Parse slope values
    slope_ranges = {
        '0-8': 8,
        '8-15': 15,
        '15-30': 30,
        '>30': 40
    }
    max_slope_value = slope_ranges[slope]
    
    recommended = []
    
    for i, crop in enumerate(rwandan_crops):
        score = 100  # Start with perfect score
        reasons = []  # Track reasons for score adjustments
        
        # Check altitude suitability
        alt_score = altitude_suitability[altitude][i]
        if alt_score < 7:
            adjustment = (7 - alt_score) * 3
            score -= adjustment
            reasons.append(f"Sub-optimal altitude (-{adjustment})")
        
        # Check season suitability
        season_score = season_suitability[season][i]
        if season_score < 7:
            adjustment = (7 - season_score) * 3
            score -= adjustment
            reasons.append(f"Sub-optimal planting season (-{adjustment})")
        
        # Check water requirement compatibility
        water_req = water_requirements[f'{altitude}_altitude'][f'{season}'][i]
        seasonal_rainfall = {
            'short_dry_season': 200,
            'long_rainy_season': 600,
            'long_dry_season': 150,
            'short_rainy_season': 450
        }
        rainfall_deficit = water_req - seasonal_rainfall[season]
        
        if rainfall_deficit > 200:
            adjustment = min(30, (rainfall_deficit - 200) // 50 * 5)
            score -= adjustment
            reasons.append(f"High water requirement for season (-{adjustment})")
        
        # Check soil compatibility
        soil_score = soil_preferences[soil_type][i]
        if soil_score < 7:
            adjustment = (7 - soil_score) * 3
            score -= adjustment
            reasons.append(f"Sub-optimal soil type (-{adjustment})")
        
        # Check slope compatibility
        if slope_tolerance[i] < max_slope_value:
            adjustment = min(30, (max_slope_value - slope_tolerance[i]) * 2)
            score -= adjustment
            reasons.append(f"Slope too steep (-{adjustment})")
        
        # Check pH compatibility
        min_ph = ph_preferences['min_ph'][i]
        max_ph = ph_preferences['max_ph'][i]
        if ph_level < min_ph:
            adjustment = min(25, int((min_ph - ph_level) * 10))
            score -= adjustment
            reasons.append(f"pH too low (-{adjustment})")
        elif ph_level > max_ph:
            adjustment = min(25, int((ph_level - max_ph) * 10))
            score -= adjustment
            reasons.append(f"pH too high (-{adjustment})")
        
        # Check drought resistance if drought risk
        if drought_risk:
            drought_res = drought_resistance[i]
            if drought_res < 6:
                adjustment = (6 - drought_res) * 5
                score -= adjustment
                reasons.append(f"Low drought resistance (-{adjustment})")
        
        # Check pest vulnerability if pest pressure is high
        if pest_pressure:
            pest_vuln = pest_vulnerability[i]
            if pest_vuln > 6:
                adjustment = (pest_vuln - 6) * 5
                score -= adjustment
                reasons.append(f"High pest vulnerability (-{adjustment})")
        
        # Check yield if minimum yield is specified
        if min_yield is not None:
            avg_yield = expected_yield[i]
            if avg_yield < min_yield:
                adjustment = min(25, int((min_yield - avg_yield) / min_yield * 100))
                score -= adjustment
                reasons.append(f"Below minimum yield requirement (-{adjustment})")
        
        # Add to recommended list if score is above threshold
        if score > 40:
            recommended.append({
                'crop': crop,
                'suitability_score': max(0, score),
                'water_requirement': water_req,
                'expected_yield': expected_yield[i],
                'drought_resistance': drought_resistance[i],
                'pest_vulnerability': pest_vulnerability[i],
                'optimal_ph': f"{min_ph} - {max_ph}",
                'soil_suitability': soil_preferences[soil_type][i],
                'altitude_suitability': altitude_suitability[altitude][i],
                'season_suitability': season_suitability[season][i],
                'adjustment_reasons': reasons
            })
    
    # Sort by suitability score
    recommended.sort(key=lambda x: x['suitability_score'], reverse=True)
    
    return recommended






# Function to suggest intercropping combinations
def suggest_intercropping(main_crop):
    """
    Suggest intercropping combinations based on a main crop.
    
    Parameters:
    - main_crop: The primary crop to be planted
    
    Returns:
    - List of compatible companion crops
    """
    try:
        crop_idx = rwanda_agricultural_data['crops'].index(main_crop)
    except ValueError:
        return f"Crop '{main_crop}' not found in database."
    
    intercrop_str = rwanda_agricultural_data['intercropping_compatibility'][crop_idx]
    if intercrop_str == 'None':
        return f"{main_crop} is not generally recommended for intercropping."
    
    return intercrop_str.split(',')

# Function to create crop rotation plans
def create_crop_rotation_plan(current_crop, years=3):
    """
    Create a crop rotation plan based on current crop.
    
    Parameters:
    - current_crop: The crop currently planted
    - years: Number of years to plan for rotation
    
    Returns:
    - List of crop rotation suggestions by year
    """
    try:
        crop_idx = rwanda_agricultural_data['crops'].index(current_crop)
    except ValueError:
        return f"Crop '{current_crop}' not found in database."
    
    # Get good rotation crops
    good_rotations = rwanda_agricultural_data['crop_rotation']['good_previous_crops'][crop_idx].split(',')
    bad_rotations = rwanda_agricultural_data['crop_rotation']['bad_previous_crops'][crop_idx].split(',')
    fallow_recommended = rwanda_agricultural_data['crop_rotation']['fallow_period_recommended'][crop_idx]
    
    # Filter out 'None' values
    if good_rotations == ['None']:
        good_rotations = []
    
    # Generate rotation plan
    rotation_plan = [{'year': 1, 'crop': current_crop}]
    
    # Add subsequent years
    for year in range(2, years + 1):
        if year == 2:
            if fallow_recommended:
                rotation_plan.append({'year': year, 'crop': 'Fallow'})
            elif good_rotations:
                rotation_plan.append({'year': year, 'crop': good_rotations[0]})
            else:
                # Find a non-bad rotation crop
                alt_crop = next((crop for crop in rwanda_agricultural_data['crops'] 
                                if crop != current_crop and crop not in bad_rotations), 'Fallow')
                rotation_plan.append({'year': year, 'crop': alt_crop})
        else:
            # For later years, try to vary the crops
            previous_crops = [item['crop'] for item in rotation_plan]
            candidates = [crop for crop in good_rotations if crop not in previous_crops]
            
            if candidates:
                rotation_plan.append({'year': year, 'crop': candidates[0]})
            elif year % 3 == 0:
                # Every third year, return to the original crop
                rotation_plan.append({'year': year, 'crop': current_crop})
            else:
                # Find another suitable crop
                alt_crop = next((crop for crop in rwanda_agricultural_data['crops'] 
                                if crop not in previous_crops and crop not in bad_rotations), 'Fallow')
                rotation_plan.append({'year': year, 'crop': alt_crop})
    
    return rotation_plan

# Execute all the calculations and save comprehensive dataset
def main():
    # Create comprehensive dataset with all crop requirements
    comprehensive_df = create_comprehensive_dataset()
    
    # Print column names to verify what's available
    # print("Available columns:", comprehensive_df.columns.tolist())
    
    # Save the dataset to CSV
    comprehensive_df.to_csv('data/comprehensive_crop_requirements.csv', index=False)
    
    # Find the columns that contain "adjusted" 
    adjusted_cols = [col for col in comprehensive_df.columns if 'adjusted' in col]
    # print("Adjusted columns:", adjusted_cols)
    
    # Select columns that exist in the dataframe
    columns_to_include = ['crop']
    for col in ['nitrogen_req', 'phosphorus_req', 'potassium_req', 'optimal_ph', 'planting_season', 
               'days_to_harvest', 'drought_resistance', 'optimal_yield', 'average_yield']:
        if col in comprehensive_df.columns:
            columns_to_include.append(col)
    
    # Add some adjusted columns (choose ones that exist)
    for altitude in ['low', 'mid', 'high']:
        col_name = f'{altitude}_altitude_short_dry_adjusted'
        if col_name in comprehensive_df.columns:
            columns_to_include.append(col_name)
    
    key_info_df = comprehensive_df[columns_to_include]
    key_info_df.to_csv('data/crop_key_info.csv', index=False)
    
    # print("Crop requirement datasets created successfully.")



In [10]:
test_data = main()
print(test_data)

None


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import os

# Function to clean and prepare the dataset
def clean_dataset(df):
    """
    Clean and prepare the crop requirements dataset for model training.
    
    Parameters:
    - df: DataFrame containing the comprehensive crop requirements
    
    Returns:
    - Cleaned DataFrame ready for model training
    """
    # Create a copy to avoid modifying the original
    cleaned_df = df.copy()
    
    # Handle missing values
    numeric_columns = cleaned_df.select_dtypes(include=[np.number]).columns
    for col in numeric_columns:
        cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
    
    # Handle categorical columns
    categorical_columns = cleaned_df.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        cleaned_df[col].fillna(cleaned_df[col].mode()[0], inplace=True)
    
    # Remove duplicate entries
    cleaned_df.drop_duplicates(inplace=True)
    
    # Convert string columns that represent lists back to proper format
    list_columns = ['intercropping_compatibility', 'common_pests', 'common_diseases', 
                    'good_previous_crops', 'bad_previous_crops']
    
    for col in list_columns:
        if col in cleaned_df.columns:
            # If strings contain commas (assuming CSV format lists)
            cleaned_df[col] = cleaned_df[col].apply(lambda x: x.split(',') if isinstance(x, str) else x)
    
    # Create binary features for fallow recommendation
    if 'fallow_recommended' in cleaned_df.columns:
        cleaned_df['fallow_recommended'] = cleaned_df['fallow_recommended'].astype(int)
    
    return cleaned_df

# Function to create feature matrices for different soil types
def create_soil_specific_features(df):
    """
    Create soil-specific feature matrices from the cleaned dataset.
    
    Parameters:
    - df: Cleaned DataFrame
    
    Returns:
    - Dictionary of soil-type specific DataFrames with relevant features
    """
    soil_types = [
        # Rwanda-specific soil types
        'nitisol',    # Red, deep, well-drained soils with good structure
        'ferralsol',  # Highly weathered, red or yellow soils common in Rwanda's highlands
        'acrisol',    # Acidic soils found in higher rainfall areas
        'andosol',    # Volcanic soils found near Rwanda's volcanic mountains
        'vertisol',   # Clay-rich soils that crack when dry
        'histosol',   # Organic-rich soils found in wetland areas
        # Original general types (kept for backward compatibility)
        'sandy', 
        'loamy', 
        'clay', 
        'silty', 
        'peaty'
    ]
    soil_dfs = {}
    
    # Select relevant features for prediction
    base_features = ['crop', 'nitrogen_req', 'phosphorus_req', 'potassium_req', 
                     'optimal_ph', 'min_sunlight_hours', 'optimal_sunlight_hours',
                     'drought_resistance', 'pest_vulnerability', 'disease_vulnerability']
    
    # Add water requirement columns
    water_features = [col for col in df.columns if 'adjusted' in col]
    
    for soil_type in soil_types:
        # Create a copy for this soil type
        soil_df = df[base_features + water_features].copy()
        
        # Apply soil-specific adjustments (example: adjust nutrient needs based on soil type)
        soil_adjustments = {
            # Common soil types in Rwanda
            'nitisol': {'nitrogen_factor': 1.0, 'phosphorus_factor': 1.0, 'potassium_factor': 0.9},
            'ferralsol': {'nitrogen_factor': 0.8, 'phosphorus_factor': 1.3, 'potassium_factor': 0.8},
            'acrisol': {'nitrogen_factor': 0.9, 'phosphorus_factor': 1.2, 'potassium_factor': 0.7},
            'andosol': {'nitrogen_factor': 1.1, 'phosphorus_factor': 0.9, 'potassium_factor': 1.0},
            'vertisol': {'nitrogen_factor': 1.0, 'phosphorus_factor': 1.1, 'potassium_factor': 1.2},
            'histosol': {'nitrogen_factor': 0.7, 'phosphorus_factor': 1.4, 'potassium_factor': 0.8},
            # Keep original general types for backward compatibility
            'sandy': {'nitrogen_factor': 1.2, 'phosphorus_factor': 1.1, 'potassium_factor': 1.3},
            'loamy': {'nitrogen_factor': 1.0, 'phosphorus_factor': 1.0, 'potassium_factor': 1.0},
            'clay': {'nitrogen_factor': 0.9, 'phosphorus_factor': 1.2, 'potassium_factor': 0.8},
            'silty': {'nitrogen_factor': 1.1, 'phosphorus_factor': 1.0, 'potassium_factor': 0.9},
            'peaty': {'nitrogen_factor': 0.8, 'phosphorus_factor': 1.3, 'potassium_factor': 0.7}
        }
        
        # Apply the adjustments
        soil_df['adjusted_nitrogen'] = soil_df['nitrogen_req'] * soil_adjustments[soil_type]['nitrogen_factor']
        soil_df['adjusted_phosphorus'] = soil_df['phosphorus_req'] * soil_adjustments[soil_type]['phosphorus_factor']
        soil_df['adjusted_potassium'] = soil_df['potassium_req'] * soil_adjustments[soil_type]['potassium_factor']
        
        # Store the dataframe
        soil_dfs[soil_type] = soil_df
    
    return soil_dfs


def train_crop_requirement_models(soil_dfs):
    """
    Train machine learning models for predicting optimal crop requirements.
    
    Parameters:
    - soil_dfs: Dictionary of soil-specific DataFrames
    
    Returns:
    - Dictionary of trained models for each soil type
    """
    models = {}
    
    # Define all possible altitude-season combinations we want to support
    altitude_types = ['low', 'mid', 'high']
    season_types = ['dry', 'short_dry', 'long_rainy', 'long_dry', 'short_rainy']
    
    for soil_type, df in soil_dfs.items():
        print(f"Training model for {soil_type} soil...")
        
        # Define base target columns
        base_targets = ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium']
        
        # Generate all altitude-season combinations
        altitude_season_targets = []
        for altitude in altitude_types:
            # Basic altitude adjustment
            altitude_season_targets.append(f"{altitude}_altitude_adjusted")
            
            # Season-specific altitude adjustments
            for season in season_types:
                altitude_season_targets.append(f"{altitude}_altitude_{season}_adjusted")
        
        # Combine all potential target columns
        potential_targets = base_targets + altitude_season_targets
        
        # Filter to only include columns that actually exist in the dataframe
        existing_targets = [col for col in potential_targets if col in df.columns]
        
        # Create any missing altitude-season columns in the dataframe
        for target in potential_targets:
            if target not in df.columns:
                # Check if this is an altitude-season column we need to create
                if any(f"{alt}_altitude" in target for alt in altitude_types) and any(season in target for season in season_types):
                    # Extract altitude and season from the target name
                    parts = target.split('_')
                    altitude = parts[0]
                    
                    # Find the season part
                    season = None
                    for s in season_types:
                        if s in target:
                            season = s
                            break
                    
                    # Create the column if we can extract altitude and season
                    if altitude and season:
                        # Use base water requirement column if it exists
                        if 'water_requirement' in df.columns:
                            base_water = df['water_requirement']
                        else:
                            # Create a default water requirement based on crop type
                            base_water = df.apply(lambda row: get_default_water(row['crop']), axis=1)
                        
                        # Apply altitude and season factors
                        altitude_factor = get_altitude_factor(altitude)
                        season_factor = get_season_factor(season)
                        
                        # Create the new column
                        df[target] = base_water * altitude_factor * season_factor
                        
                        # Add this new column to our targets
                        existing_targets.append(target)
                        print(f"Created missing column: {target}")
        
        if not existing_targets:
            print(f"Warning: No target columns found for {soil_type}. Skipping this soil type.")
            continue
            
        print(f"Using target columns: {existing_targets}")
        
        # Features (all columns except targets and any other non-feature columns)
        non_feature_cols = existing_targets + ['id'] if 'id' in df.columns else existing_targets
        feature_df = df.drop(non_feature_cols, axis=1, errors='ignore')
        
        # Convert crop names to categorical using one-hot encoding
        categorical_features = ['crop'] if 'crop' in feature_df.columns else []
        numeric_features = list(set(feature_df.columns) - set(categorical_features))
        
        # Create preprocessing pipeline
        transformers = []
        if numeric_features:
            transformers.append(('num', StandardScaler(), numeric_features))
        if categorical_features:
            transformers.append(('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features))
            
        preprocessor = ColumnTransformer(transformers=transformers)
        
        # Create separate models for each target variable
        soil_models = {}
        
        for target in existing_targets:
            # Extract the target values
            y = df[target]
            
            # Create the pipeline with preprocessor and model
            model_pipeline = Pipeline(steps=[
                ('preprocessor', preprocessor),
                ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
            ])
            
            # Train the model
            model_pipeline.fit(feature_df, y)
            
            # Store the model
            soil_models[target] = model_pipeline
        
        # Store all models for this soil type
        models[soil_type] = soil_models
    
    return models

# Helper functions for generating water requirements
def get_default_water(crop_name):
    """Return a default water requirement based on crop type"""
    # Map common crops to typical water requirements (in mm)
    water_map = {
        'rice': 900,
        'maize': 500, 
        'wheat': 450,
        'beans': 300,
        'cassava': 400,
        'potato': 500,
        'tomato': 400,
        'cabbage': 380,
        'carrot': 350,
        'onion': 350
    }
    
    # Convert crop name to lowercase for case-insensitive matching
    crop_lower = crop_name.lower()
    
    # Return the mapped value or a default of 450mm
    return water_map.get(crop_lower, 450)

def get_altitude_factor(altitude):
    """Return adjustment factor based on altitude"""
    factors = {
        'low': 1.2,   # Higher water requirement in low altitude
        'mid': 1.0,   # Base reference
        'high': 0.8   # Lower water requirement in high altitude
    }
    return factors.get(altitude, 1.0)

def get_season_factor(season):
    """Return adjustment factor based on season"""
    factors = {
        'dry': 1.2,          # General dry season
        'short_dry': 1.1,    # Higher requirement in dry season
        'long_rainy': 0.5,   # Lower requirement in rainy season
        'long_dry': 1.4,     # Highest requirement in long dry season
        'short_rainy': 0.8   # Moderate requirement in short rainy season
    }
    return factors.get(season, 1.0)

# Function to save the trained models
def save_models(models, model_dir='models'):
    """
    Save trained models to disk.
    
    Parameters:
    - models: Dictionary of trained models
    - model_dir: Directory to save models
    """
    # Create the model directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    
    # Save each soil type model
    for soil_type, soil_models in models.items():
        soil_dir = os.path.join(model_dir, soil_type)
        os.makedirs(soil_dir, exist_ok=True)
        
        # Save each target model
        for target, model in soil_models.items():
            model_path = os.path.join(soil_dir, f"{target}_model.joblib")
            joblib.dump(model, model_path)
            # print(f"Saved model: {model_path}")

# Function to predict crop requirements
def predict_crop_requirements(crop_name, soil_type, model_dir='models', altitude='mid', season='dry', duration='short'):
    """
    Predict crop requirements for best yield based on crop name and soil type.
    
    Parameters:
    - crop_name: Name of the crop
    - soil_type: Type of soil ('sandy', 'loamy', 'clay', 'silty', 'peaty', 'andosol', 'vertisol', 'histosol')
    - model_dir: Directory where models are stored
    - altitude: Altitude level ('low', 'mid', 'high')
    - season: Growing season ('dry', 'rainy')
    - duration: Season duration ('short', 'long')
    
    Returns:
    - Dictionary of predicted requirements for best yield
    """
    # Load the base crop data to use as input features
    try:
        # Load the comprehensive dataset
        df = pd.read_csv('data/comprehensive_crop_requirements.csv')
        
        # Get the row for this crop
        crop_data = df[df['crop'] == crop_name]
        
        if len(crop_data) == 0:
            return {"error": f"Crop '{crop_name}' not found in the database."}
        
        # Create input features
        input_features = crop_data.copy()
        
        # Load the models for this soil type
        soil_models = {}
        soil_dir = os.path.join(model_dir, soil_type)
        
        target_variables = ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium',
                         f'low_altitude_{duration}_{season}_adjusted', 
                         f'mid_altitude_{duration}_{season}_adjusted', 
                         f'high_altitude_{duration}_{season}_adjusted']
        
        for target in target_variables:
            model_path = os.path.join(soil_dir, f"{target}_model.joblib")
            if os.path.exists(model_path):
                soil_models[target] = joblib.load(model_path)
            else:
                return {"error": f"Model for {soil_type} soil and {target} not found."}
        
        # Make predictions for each target variable
        predictions = {}
        for target, model in soil_models.items():
            predictions[target] = model.predict(input_features)[0]
        
        # Select the water requirement based on altitude and season
        water_req_key = f"{altitude}_altitude_{duration}_{season}_adjusted"
        water_requirement = predictions.get(water_req_key, None)
        
        # Format the results
        requirements = {
            "crop": crop_name,
            "soil_type": soil_type,
            "requirements": {
                "nitrogen_kg_per_ha": round(predictions['adjusted_nitrogen'], 2),
                "phosphorus_kg_per_ha": round(predictions['adjusted_phosphorus'], 2),
                "potassium_kg_per_ha": round(predictions['adjusted_potassium'], 2),
                "water_requirement_mm": round(water_requirement, 2) if water_requirement else None,
                "optimal_ph": float(crop_data['optimal_ph'].values[0]),
                "min_sunlight_hours": int(crop_data['min_sunlight_hours'].values[0]),
                "planting_info": {
                    "row_spacing_cm": int(crop_data['row_spacing_cm'].values[0]) if 'row_spacing_cm' in crop_data.columns else None,
                    "plant_spacing_cm": int(crop_data['plant_spacing_cm'].values[0]) if 'plant_spacing_cm' in crop_data.columns else None,
                    "planting_depth_cm": int(crop_data['planting_depth_cm'].values[0]) if 'planting_depth_cm' in crop_data.columns else None,
                }
            },
            "expected_yield_tons_per_ha": float(crop_data['optimal_yield'].values[0]) if 'optimal_yield' in crop_data.columns else None
        }
        
        # Add intercropping recommendation if available
        if 'intercropping_compatibility' in crop_data.columns:
            intercrop_value = crop_data['intercropping_compatibility'].values[0]
            if isinstance(intercrop_value, str) and intercrop_value != 'None':
                requirements["intercropping_recommendation"] = intercrop_value.split(',')
        
        return requirements
        
    except Exception as e:
        return {"error": f"Error predicting requirements: {str(e)}"}
    
    
def example_usage():
    """
    Example usage of the crop requirement prediction function.
    """
    crop_name = "Maize"
    soil_type = "loamy"
    
    print(f"Predicting requirements for {crop_name} in {soil_type} soil...")
    requirements = predict_crop_requirements(crop_name, soil_type)
    
    # Check if there's an error in the result
    if "error" in requirements:
        print(f"Error: {requirements['error']}")
        return
    
    print(f"\nCrop Requirements for {crop_name} in {soil_type} soil:")
    print(f"Nitrogen: {requirements['requirements']['nitrogen_kg_per_ha']} kg/ha")
    print(f"Phosphorus: {requirements['requirements']['phosphorus_kg_per_ha']} kg/ha")
    print(f"Potassium: {requirements['requirements']['potassium_kg_per_ha']} kg/ha")
    
    if requirements['requirements']['water_requirement_mm']:
        print(f"Water requirement: {requirements['requirements']['water_requirement_mm']} mm")
    
    print(f"Optimal pH: {requirements['requirements']['optimal_ph']}")
    print(f"Minimum sunlight: {requirements['requirements']['min_sunlight_hours']} hours")
    print(f"\nPlanting information:")
    print(f"Row spacing: {requirements['requirements']['planting_info']['row_spacing_cm']} cm")
    print(f"Plant spacing: {requirements['requirements']['planting_info']['plant_spacing_cm']} cm")
    print(f"Planting depth: {requirements['requirements']['planting_info']['planting_depth_cm']} cm")
    
    print(f"\nExpected yield: {requirements['expected_yield_tons_per_ha']} tons/ha")
    
    if "intercropping_recommendation" in requirements:
        print(f"\nIntercropping recommendation: {', '.join(requirements['intercropping_recommendation'])}")
# Main function to run the entire process
def main():
    """
    Main function to process the dataset, train models, and save them.
    """
    print("Loading comprehensive crop requirements dataset...")
    try:
        df = pd.read_csv('data/comprehensive_crop_requirements.csv')
    except FileNotFoundError:
        print("Error: Dataset file not found. Please make sure the file exists.")
        return
    
    print("Cleaning dataset...")
    cleaned_df = clean_dataset(df)
    
    print("Creating soil-specific feature matrices...")
    soil_dfs = create_soil_specific_features(cleaned_df)
    
    print("Training crop requirement prediction models...")
    models = train_crop_requirement_models(soil_dfs)
    
    print("Saving trained models...")
    save_models(models)
    
    print("Testing prediction function...")
    # Test the prediction function with a sample crop and soil type
    test_result = predict_crop_requirements('Maize', 'loamy')
    print(f"Sample prediction for Maize in loamy soil:")
    for key, value in test_result.items():
        if key != 'requirements':
            print(f"{key}: {value}")
        else:
            print("Requirements:")
            for req_key, req_value in value.items():
                print(f"  {req_key}: {req_value}")
    
    print("\nProcess completed successfully. Models are ready for crop requirement predictions.")


if __name__ == "__main__":
    main()
    example_usage()

Loading comprehensive crop requirements dataset...
Cleaning dataset...
Creating soil-specific feature matrices...
Training crop requirement prediction models...
Training model for nitisol soil...
Created missing column: low_altitude_dry_adjusted
Created missing column: mid_altitude_dry_adjusted
Created missing column: high_altitude_dry_adjusted
Using target columns: ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium', 'low_altitude_short_dry_adjusted', 'low_altitude_long_rainy_adjusted', 'low_altitude_long_dry_adjusted', 'low_altitude_short_rainy_adjusted', 'mid_altitude_short_dry_adjusted', 'mid_altitude_long_rainy_adjusted', 'mid_altitude_long_dry_adjusted', 'mid_altitude_short_rainy_adjusted', 'high_altitude_short_dry_adjusted', 'high_altitude_long_rainy_adjusted', 'high_altitude_long_dry_adjusted', 'high_altitude_short_rainy_adjusted', 'low_altitude_dry_adjusted', 'mid_altitude_dry_adjusted', 'high_altitude_dry_adjusted']


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate objec

Training model for ferralsol soil...
Created missing column: low_altitude_dry_adjusted
Created missing column: mid_altitude_dry_adjusted
Created missing column: high_altitude_dry_adjusted
Using target columns: ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium', 'low_altitude_short_dry_adjusted', 'low_altitude_long_rainy_adjusted', 'low_altitude_long_dry_adjusted', 'low_altitude_short_rainy_adjusted', 'mid_altitude_short_dry_adjusted', 'mid_altitude_long_rainy_adjusted', 'mid_altitude_long_dry_adjusted', 'mid_altitude_short_rainy_adjusted', 'high_altitude_short_dry_adjusted', 'high_altitude_long_rainy_adjusted', 'high_altitude_long_dry_adjusted', 'high_altitude_short_rainy_adjusted', 'low_altitude_dry_adjusted', 'mid_altitude_dry_adjusted', 'high_altitude_dry_adjusted']
Training model for acrisol soil...
Created missing column: low_altitude_dry_adjusted
Created missing column: mid_altitude_dry_adjusted
Created missing column: high_altitude_dry_adjusted
Using target column

In [8]:

# Modified function to predict crop requirements with better error handling and fallbacks
def predict_crop_requirements(crop_name, soil_type, model_dir='models', altitude='mid', season='short_dry'):
    """
    Predict crop requirements for best yield based on crop name, soil type and season.
    
    Parameters:
    - crop_name: Name of the crop
    - soil_type: Type of soil ('sandy', 'loamy', 'clay', 'silty', 'peaty', etc.)
    - model_dir: Directory where models are stored
    - altitude: Altitude level ('low', 'mid', 'high')
    - season: Growing season in Rwanda ('short_dry', 'long_rainy', 'long_dry', 'short_rainy')
    
    Returns:
    - Dictionary of predicted requirements for best yield or error message
    """
    try:
        # Check if model directory exists
        if not os.path.exists(model_dir):
            return {"error": f"Model directory '{model_dir}' not found."}
            
        # Check if soil type subdirectory exists
        soil_dir = os.path.join(model_dir, soil_type)
        if not os.path.exists(soil_dir):
            # Try to find an alternative soil type
            available_soils = [d for d in os.listdir(model_dir) 
                              if os.path.isdir(os.path.join(model_dir, d))]
            if not available_soils:
                return {"error": f"No soil type models found in '{model_dir}'."}
            
            # Use the first available soil type as fallback
            soil_type = available_soils[0]
            soil_dir = os.path.join(model_dir, soil_type)
            print(f"Warning: Soil type '{soil_type}' not found. Using '{soil_type}' instead.")
        
        # Load the comprehensive dataset
        try:
            df = pd.read_csv('data/comprehensive_crop_requirements.csv')
        except FileNotFoundError:
            return {"error": "Dataset file not found. Please make sure 'data/comprehensive_crop_requirements.csv' exists."}
        
        # Get the row for this crop
        crop_data = df[df['crop'].str.lower() == crop_name.lower()]
        
        if len(crop_data) == 0:
            # Try to find a close match using fuzzy matching
            from difflib import get_close_matches
            all_crops = df['crop'].unique()
            close_matches = get_close_matches(crop_name, all_crops)
            
            if close_matches:
                crop_name = close_matches[0]
                crop_data = df[df['crop'] == crop_name]
                print(f"Warning: Crop '{crop_name}' not found. Using closest match '{crop_name}' instead.")
            else:
                return {"error": f"Crop '{crop_name}' not found in the database and no close matches were found."}
        
        # Create input features with a copy of crop data
        input_features = crop_data.copy()
        
        # Map Rwanda's seasons to the format in the dataset
        season_mapping = {
            'short_dry': 'short_dry',      # Dec-Feb
            'long_rainy': 'long_rainy',    # Mar-May
            'long_dry': 'long_dry',        # Jun-Sep
            'short_rainy': 'short_rainy'   # Sep-Dec
        }
        
        detailed_season = season_mapping.get(season, 'short_dry')  # Default to short_dry if unknown
        
        # Add missing altitude and season columns that models might expect
        altitude_types = ['low', 'mid', 'high']
        season_types = ['dry', 'wet', 'short_dry', 'long_rainy', 'long_dry', 'short_rainy']
        
        # Generate all possible altitude-season combinations and ensure they exist in input_features
        for alt in altitude_types:
            for seas in season_types:
                col_name = f"{alt}_altitude_{seas}_adjusted"
                if col_name not in input_features.columns:
                    input_features[col_name] = 0.0  # Add with default value
                
        # Also ensure base altitude columns exist
        for alt in altitude_types:
            col_name = f"{alt}_altitude_adjusted"
            if col_name not in input_features.columns:
                input_features[col_name] = 0.0
        
        # Load the models for this soil type
        soil_models = {}
        
        # Define target variables - both general and season-specific
        base_targets = ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium']
        
        # Check available model files directly
        available_models = []
        for filename in os.listdir(soil_dir):
            if filename.endswith('_model.joblib'):
                model_name = filename.replace('_model.joblib', '')
                available_models.append(model_name)
        
        # Find the best matching water requirement column/model
        water_req_key = None
        
        # First try: exact match for altitude and season
        exact_match = f"{altitude}_altitude_{detailed_season}_adjusted"
        if exact_match in available_models:
            water_req_key = exact_match
        
        # Second try: match with any season at this altitude
        if not water_req_key:
            altitude_models = [m for m in available_models if f"{altitude}_altitude" in m and "_adjusted" in m]
            if altitude_models:
                water_req_key = altitude_models[0]
                print(f"Warning: No exact match for {altitude} altitude {detailed_season}. Using {water_req_key} instead.")
        
        # Third try: match with any altitude for this season
        if not water_req_key:
            season_models = [m for m in available_models if f"{detailed_season}_adjusted" in m]
            if season_models:
                water_req_key = season_models[0]
                print(f"Warning: No model for {altitude} altitude. Using {water_req_key} instead.")
        
        # Fourth try: use any available water requirement model
        if not water_req_key:
            water_models = [m for m in available_models if "_altitude" in m and "_adjusted" in m]
            if water_models:
                water_req_key = water_models[0]
                print(f"Warning: No specific model found for {altitude} altitude {detailed_season}. Using {water_req_key} as fallback.")
            else:
                # If no water requirement models are found, we'll proceed without water predictions
                print(f"Warning: No water requirement models found. Proceeding with nutrient predictions only.")
                water_req_key = None
        
        # Try to load all required models
        target_variables = base_targets.copy()
        if water_req_key:
            target_variables.append(water_req_key)
            
        missing_models = []
        
        for target in target_variables:
            model_path = os.path.join(soil_dir, f"{target}_model.joblib")
            if os.path.exists(model_path):
                try:
                    soil_models[target] = joblib.load(model_path)
                except Exception as e:
                    print(f"Error loading model {target}: {str(e)}")
                    missing_models.append(f"{target} (Error: {str(e)})")
            else:
                missing_models.append(target)
        
        # If we're missing nutrient models, try to use base models directly from dataset
        if any(target in missing_models for target in base_targets):
            print("Warning: Some nutrient models are missing. Using dataset values directly.")
            for nutrient in base_targets:
                if nutrient in missing_models and nutrient.replace('adjusted_', '') in crop_data.columns:
                    base_nutrient = nutrient.replace('adjusted_', '')
                    soil_models[nutrient] = {
                        'direct_value': float(crop_data[base_nutrient].values[0])
                    }
                    missing_models.remove(nutrient)
        
        # If we're still missing crucial models after fallbacks, return error
        if missing_models and all(target in missing_models for target in base_targets):
            return {"error": f"Critical models not found: {', '.join(missing_models)}"}
        
        # Make predictions for each target variable or use direct values
        predictions = {}
        for target, model in soil_models.items():
            if isinstance(model, dict) and 'direct_value' in model:
                # Use direct value from the dataset
                predictions[target] = model['direct_value']
            else:
                # Use the model to predict
                try:
                    # Check if input_features has all columns the model expects
                    if hasattr(model, 'feature_names_in_'):
                        missing_cols = set(model.feature_names_in_) - set(input_features.columns)
                        for col in missing_cols:
                            input_features[col] = 0.0  # Add missing columns with default values
                    
                    predictions[target] = model.predict(input_features)[0]
                except Exception as e:
                    print(f"Error predicting with {target} model: {str(e)}")
                    # Use a default value based on dataset if prediction fails
                    if target.replace('adjusted_', '') in crop_data.columns:
                        base_value = crop_data[target.replace('adjusted_', '')].values[0]
                        predictions[target] = float(base_value)
                        print(f"Using default value for {target}: {predictions[target]}")
                    else:
                        # Use reasonable defaults if all else fails
                        defaults = {
                            'adjusted_nitrogen': 50.0,
                            'adjusted_phosphorus': 25.0,
                            'adjusted_potassium': 30.0
                        }
                        if target in defaults:
                            predictions[target] = defaults[target]
                            print(f"Using standard default for {target}: {predictions[target]}")
                        else:
                            # For water requirements, use a reasonable default based on season
                            water_defaults = {
                                'short_dry': 450,
                                'long_rainy': 200,
                                'long_dry': 550,
                                'short_rainy': 350
                            }
                            predictions[target] = water_defaults.get(season, 400)
                            print(f"Using seasonal default water value: {predictions[target]}")
        
        # Apply seasonal adjustments for nutrient requirements
        seasonal_factors = {
            'short_dry': {
                'nitrogen_factor': 1.0,
                'phosphorus_factor': 1.0,
                'potassium_factor': 1.0,
                'yield_factor': 1.0
            },
            'long_rainy': {
                'nitrogen_factor': 1.25,
                'phosphorus_factor': 0.9,
                'potassium_factor': 1.15,
                'yield_factor': 1.1
            },
            'long_dry': {
                'nitrogen_factor': 0.9,
                'phosphorus_factor': 1.1,
                'potassium_factor': 0.95,
                'yield_factor': 0.9
            },
            'short_rainy': {
                'nitrogen_factor': 1.15,
                'phosphorus_factor': 0.95,
                'potassium_factor': 1.05,
                'yield_factor': 1.05
            }
        }
        
        # Apply seasonal adjustment factors
        nitrogen = predictions.get('adjusted_nitrogen', 50.0) * seasonal_factors[season]['nitrogen_factor']
        phosphorus = predictions.get('adjusted_phosphorus', 25.0) * seasonal_factors[season]['phosphorus_factor']
        potassium = predictions.get('adjusted_potassium', 30.0) * seasonal_factors[season]['potassium_factor']
        
        # Select the water requirement based on altitude and season
        water_requirement = predictions.get(water_req_key, None)
        
        # If water_requirement is still None, provide a default based on season and altitude
        if water_requirement is None:
            # Base defaults by season (in mm)
            water_defaults = {
                'short_dry': 450,
                'long_rainy': 200,
                'long_dry': 550,
                'short_rainy': 350
            }
            
            # Altitude adjustment factors
            altitude_factors = {
                'low': 1.2,  # Higher water requirement in low altitude
                'mid': 1.0,  # Base reference
                'high': 0.8   # Lower water requirement in high altitude (cooler, less evaporation)
            }
            
            # Use the default for the season, adjusted by altitude
            base_water = water_defaults.get(season, 400)
            altitude_factor = altitude_factors.get(altitude, 1.0)
            water_requirement = base_water * altitude_factor
            print(f"Using calculated default water requirement: {water_requirement} mm")
        
        # Format the results
        requirements = {
            "crop": crop_name,
            "soil_type": soil_type,
            "season": season,
            "altitude": altitude,
            "requirements": {
                "nitrogen_kg_per_ha": round(nitrogen, 2),
                "phosphorus_kg_per_ha": round(phosphorus, 2),
                "potassium_kg_per_ha": round(potassium, 2),
                "water_requirement_mm": round(water_requirement, 2) if water_requirement else None,
            }
        }
        
        # Add optional fields if they exist in the dataset
        if 'optimal_ph' in crop_data:
            requirements["requirements"]["optimal_ph"] = float(crop_data['optimal_ph'].values[0])
        
        if 'min_sunlight_hours' in crop_data:
            requirements["requirements"]["min_sunlight_hours"] = int(crop_data['min_sunlight_hours'].values[0])
        
        # Add planting information if available
        planting_fields = ['row_spacing_cm', 'plant_spacing_cm', 'planting_depth_cm']
        if all(field in crop_data.columns for field in planting_fields):
            planting_info = {
                "row_spacing_cm": int(crop_data['row_spacing_cm'].values[0]),
                "plant_spacing_cm": int(crop_data['plant_spacing_cm'].values[0]),
                "planting_depth_cm": int(crop_data['planting_depth_cm'].values[0]),
            }
            requirements["requirements"]["planting_info"] = planting_info
        
        # Add expected yield if available
        if 'optimal_yield' in crop_data.columns:
            base_yield = float(crop_data['optimal_yield'].values[0])
            # Apply yield factor based on season
            adjusted_yield = base_yield * seasonal_factors[season]['yield_factor']
            requirements["expected_yield_tons_per_ha"] = round(adjusted_yield, 2)
        
        # Add intercropping recommendation if available
        if 'intercropping_compatibility' in crop_data.columns:
            intercrop_value = crop_data['intercropping_compatibility'].values[0]
            if isinstance(intercrop_value, str) and intercrop_value != 'None':
                requirements["intercropping_recommendation"] = intercrop_value.split(',')
        
        # Add season-specific recommendations for Rwanda's four seasons
        if season == 'short_dry':
            requirements["seasonal_recommendations"] = [
                "Implement water conservation techniques",
                "Consider drought-resistant varieties",
                "Apply mulch to reduce evaporation",
                "Use drip irrigation if available"
            ]
        elif season == 'long_rainy':
            requirements["seasonal_recommendations"] = [
                "Ensure proper drainage systems to prevent waterlogging",
                "Monitor closely for fungal diseases",
                "Consider raised beds in low-lying areas",
                "Implement erosion control measures on slopes"
            ]
        elif season == 'long_dry':
            requirements["seasonal_recommendations"] = [
                "Increase irrigation frequency and volume",
                "Use deep mulching to preserve soil moisture",
                "Consider shade structures for sensitive crops",
                "Implement windbreaks to reduce evapotranspiration"
            ]
        elif season == 'short_rainy':
            requirements["seasonal_recommendations"] = [
                "Monitor drainage but prepare for dry spells",
                "Implement integrated pest management for seasonal pests",
                "Consider cover crops to prevent soil erosion",
                "Time planting to maximize use of rainfall patterns"
            ]
            
        # Add note about model limitations if fallbacks were used
        if missing_models:
            requirements["model_notes"] = f"Some models were unavailable ({', '.join(missing_models)}). Results may be less accurate."
        
        return requirements
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Detailed error: {error_details}")
        return {"error": f"Error predicting requirements: {str(e)}"}


def make_crop_requirement_prediction():
    try:
        df = pd.read_csv('data/comprehensive_crop_requirements.csv')
    except FileNotFoundError:
        print("Error: Dataset file not found. Please make sure the file exists.")
        return
    
    cleaned_df = clean_dataset(df)
    soil_dfs = create_soil_specific_features(cleaned_df)
    models = train_crop_requirement_models(soil_dfs)
    save_models(models)
    
    print("\nTesting seasonal predictions...")
    
    crop = input("Enter crop name (e.g., Rice, Maize): ")
    soil = input("Enter soil type (sandy, loamy, clay, silty, peaty): ")
    altitude = input("Enter altitude level (low/mid/high): ").lower()
    
    # Modified to handle Rwanda's four seasons
    print("\nRwanda's seasons:")
    print("1. Short dry season (mid-December to mid-March)")
    print("2. Long rainy season (mid-March to mid-May)")
    print("3. Long dry season (mid-May to mid-September)")
    print("4. Short rainy season (mid-September to mid-December)")
    
    season_choice = input("Choose season (1-4): ")
    
    # Map season choice to appropriate format for the prediction function
    season_mapping = {
        '1': 'short_dry',
        '2': 'long_rainy',
        '3': 'long_dry',
        '4': 'short_rainy'
    }
    
    # Full season names for display
    season_names = {
        '1': 'Short dry season',
        '2': 'Long rainy season',
        '3': 'Long dry season',
        '4': 'Short rainy season'
    }
    
    if season_choice not in season_mapping:
        print(f"Invalid season choice '{season_choice}'. Using Short dry season as default.")
        season_choice = '1'
    
    season = season_mapping[season_choice]
    season_name = season_names[season_choice]
        
    if altitude not in ['low', 'mid', 'high']:
        print(f"Invalid altitude '{altitude}'. Using 'mid' as default.")
        altitude = 'mid'
    
    # Call predict_crop_requirements with the appropriate parameters
    try:
        prediction = predict_crop_requirements(crop, soil, altitude=altitude, season=season)
        
        # Check if there's an error in the response
        if 'error' in prediction:
            print(f"\nError in prediction: {prediction['error']}")
        else:
            print("\n" + "="*50)
            print(f"PREDICTION RESULTS:")
            print("="*50)
            print(f"Crop: {prediction['crop']}")
            print(f"Soil Type: {prediction['soil_type']}")
            print(f"Season: {season_name}")  # Use the full season name
            print(f"Altitude: {prediction['altitude']}")
            print("\nCROP REQUIREMENTS:")
            print("-"*50)
            print(f"Nitrogen: {prediction['requirements']['nitrogen_kg_per_ha']} kg/ha")
            print(f"Phosphorus: {prediction['requirements']['phosphorus_kg_per_ha']} kg/ha")
            print(f"Potassium: {prediction['requirements']['potassium_kg_per_ha']} kg/ha")
            print(f"Water: {prediction['requirements']['water_requirement_mm']} mm")
            
            # Rest of your printing code remains the same
            if 'optimal_ph' in prediction['requirements']:
                print(f"Optimal pH: {prediction['requirements']['optimal_ph']}")
            
            if 'planting_info' in prediction['requirements']:
                print("\nPLANTING INFORMATION:")
                print("-"*50)
                planting = prediction['requirements']['planting_info']
                print(f"Row Spacing: {planting['row_spacing_cm']} cm")
                print(f"Plant Spacing: {planting['plant_spacing_cm']} cm")
                print(f"Planting Depth: {planting['planting_depth_cm']} cm")
            
            if 'expected_yield_tons_per_ha' in prediction:
                print("\nYIELD INFORMATION:")
                print("-"*50)
                print(f"Expected Yield: {prediction['expected_yield_tons_per_ha']} tons/ha")
            
            if 'intercropping_recommendation' in prediction:
                print("\nINTERCROPPING RECOMMENDATIONS:")
                print("-"*50)
                print(f"Compatible crops: {', '.join(prediction['intercropping_recommendation'])}")
            
            if 'seasonal_recommendations' in prediction:
                print("\nSEASONAL RECOMMENDATIONS:")
                print("-"*50)
                for i, rec in enumerate(prediction['seasonal_recommendations'], 1):
                    print(f"{i}. {rec}")
    except Exception as e:
        print(f"Error processing prediction: {str(e)}")

predicted_data = make_crop_requirement_prediction()
print(predicted_data)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate objec

Training model for nitisol soil...
Created missing column: low_altitude_dry_adjusted
Created missing column: mid_altitude_dry_adjusted
Created missing column: high_altitude_dry_adjusted
Using target columns: ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium', 'low_altitude_short_dry_adjusted', 'low_altitude_long_rainy_adjusted', 'low_altitude_long_dry_adjusted', 'low_altitude_short_rainy_adjusted', 'mid_altitude_short_dry_adjusted', 'mid_altitude_long_rainy_adjusted', 'mid_altitude_long_dry_adjusted', 'mid_altitude_short_rainy_adjusted', 'high_altitude_short_dry_adjusted', 'high_altitude_long_rainy_adjusted', 'high_altitude_long_dry_adjusted', 'high_altitude_short_rainy_adjusted', 'low_altitude_dry_adjusted', 'mid_altitude_dry_adjusted', 'high_altitude_dry_adjusted']
Training model for ferralsol soil...
Created missing column: low_altitude_dry_adjusted
Created missing column: mid_altitude_dry_adjusted
Created missing column: high_altitude_dry_adjusted
Using target column

In [5]:
import pandas as pd
from joblib import load
import os


def predict_soil_texture(district, sector):

    try:
        # Load the model and preprocessor
        model_path = 'models/best_soil_texture_model.joblib'
        preprocessor_path = 'models/soil_preprocessor.joblib'
        
        if not os.path.exists(model_path) or not os.path.exists(preprocessor_path):
            return "Error: Model or preprocessor file not found."
        
        model = load(model_path)
        preprocessor = load(preprocessor_path)
        
        # Load the original dataset to get coordinates
        try:
            dataset = pd.read_csv('./Rwanda soil data/rwanda_soilTypes.csv')
            
            # Find average coordinates for the specific sector
            sector_data = dataset[(dataset['District'] == district) & (dataset['Sector'] == sector)]
            
            if len(sector_data) > 0:
                # Use average coordinates for the sector
                latitude = sector_data['Latitude'].mean()
                longitude = sector_data['Longitude'].mean()
            else:
                return f"Error: The combination of District '{district}' and Sector '{sector}' was not found in the dataset."
                
        except FileNotFoundError:
            return "Error: Dataset file not found."
        
        # Create a dataframe with the input data
        input_data = pd.DataFrame({
            'District': [district],
            'Latitude': [latitude],
            'Longitude': [longitude]
        })

        # Get a sample of the training data to fit the preprocessor
        training_data = dataset[['District', 'Latitude', 'Longitude']]
        
        # Fit the preprocessor on the training data
        preprocessor.fit(training_data)
        
        # Now transform the input data
        X_processed = preprocessor.transform(input_data)
        
        # Make the prediction
        prediction = model.predict(X_processed)[0]
        
        return prediction
    
    except Exception as e:
        return f"Error during prediction: {str(e)}"


def predict_soil(district, sector):
    
    soil_prediction = predict_soil_texture(district_name, sector_name)
    if isinstance(soil_prediction, str):
        print(f"Predicted soil texture for {district_name}, {sector_name}: {soil_prediction}")
    else:
        # If the prediction is successful, it will be a string with the predicted soil texture
        print(f"Predicted soil texture for {district_name}, {sector_name}: {soil_prediction}")
    
    return soil_prediction



# Example: Using only district and sector names
district_name = input("Enter district name: ")
sector_name = input("Enter sector name: ")

predicted_soil = predict_soil(district_name, sector_name)
print(predicted_soil)

# Modified function to predict crop requirements with better error handling and fallbacks
def predict_crop_requirements(crop_name, soil_type, model_dir='models', altitude='mid', season='short_dry'):
    """
    Predict crop requirements for best yield based on crop name, soil type and season.
    
    Parameters:
    - crop_name: Name of the crop
    - soil_type: Type of soil ('sandy', 'loamy', 'clay', 'silty', 'peaty', etc.)
    - model_dir: Directory where models are stored
    - altitude: Altitude level ('low', 'mid', 'high')
    - season: Growing season in Rwanda ('short_dry', 'long_rainy', 'long_dry', 'short_rainy')
    
    Returns:
    - Dictionary of predicted requirements for best yield or error message
    """
    try:
        # Check if model directory exists
        if not os.path.exists(model_dir):
            return {"error": f"Model directory '{model_dir}' not found."}
            
        # Check if soil type subdirectory exists
        soil_dir = os.path.join(model_dir, soil_type)
        if not os.path.exists(soil_dir):
            # Try to find an alternative soil type
            available_soils = [d for d in os.listdir(model_dir) 
                              if os.path.isdir(os.path.join(model_dir, d))]
            if not available_soils:
                return {"error": f"No soil type models found in '{model_dir}'."}
            
            # Use the first available soil type as fallback
            soil_type = available_soils[0]
            soil_dir = os.path.join(model_dir, soil_type)
            print(f"Warning: Soil type '{soil_type}' not found. Using '{soil_type}' instead.")
        
        # Load the comprehensive dataset
        try:
            df = pd.read_csv('data/comprehensive_crop_requirements.csv')
        except FileNotFoundError:
            return {"error": "Dataset file not found. Please make sure 'data/comprehensive_crop_requirements.csv' exists."}
        
        # Get the row for this crop
        crop_data = df[df['crop'].str.lower() == crop_name.lower()]
        
        if len(crop_data) == 0:
            # Try to find a close match using fuzzy matching
            from difflib import get_close_matches
            all_crops = df['crop'].unique()
            close_matches = get_close_matches(crop_name, all_crops)
            
            if close_matches:
                crop_name = close_matches[0]
                crop_data = df[df['crop'] == crop_name]
                print(f"Warning: Crop '{crop_name}' not found. Using closest match '{crop_name}' instead.")
            else:
                return {"error": f"Crop '{crop_name}' not found in the database and no close matches were found."}
        
        # Create input features with a copy of crop data
        input_features = crop_data.copy()
        
        # Map Rwanda's seasons to the format in the dataset
        season_mapping = {
            'short_dry': 'short_dry',      # Dec-Feb
            'long_rainy': 'long_rainy',    # Mar-May
            'long_dry': 'long_dry',        # Jun-Sep
            'short_rainy': 'short_rainy'   # Sep-Dec
        }
        
        detailed_season = season_mapping.get(season, 'short_dry')  # Default to short_dry if unknown
        
        # Add missing altitude and season columns that models might expect
        altitude_types = ['low', 'mid', 'high']
        season_types = ['dry', 'wet', 'short_dry', 'long_rainy', 'long_dry', 'short_rainy']
        
        # Generate all possible altitude-season combinations and ensure they exist in input_features
        for alt in altitude_types:
            for seas in season_types:
                col_name = f"{alt}_altitude_{seas}_adjusted"
                if col_name not in input_features.columns:
                    input_features[col_name] = 0.0  # Add with default value
                
        # Also ensure base altitude columns exist
        for alt in altitude_types:
            col_name = f"{alt}_altitude_adjusted"
            if col_name not in input_features.columns:
                input_features[col_name] = 0.0
        
        # Load the models for this soil type
        soil_models = {}
        
        # Define target variables - both general and season-specific
        base_targets = ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium']
        
        # Check available model files directly
        available_models = []
        for filename in os.listdir(soil_dir):
            if filename.endswith('_model.joblib'):
                model_name = filename.replace('_model.joblib', '')
                available_models.append(model_name)
        
        # Find the best matching water requirement column/model
        water_req_key = None
        
        # First try: exact match for altitude and season
        exact_match = f"{altitude}_altitude_{detailed_season}_adjusted"
        if exact_match in available_models:
            water_req_key = exact_match
        
        # Second try: match with any season at this altitude
        if not water_req_key:
            altitude_models = [m for m in available_models if f"{altitude}_altitude" in m and "_adjusted" in m]
            if altitude_models:
                water_req_key = altitude_models[0]
                print(f"Warning: No exact match for {altitude} altitude {detailed_season}. Using {water_req_key} instead.")
        
        # Third try: match with any altitude for this season
        if not water_req_key:
            season_models = [m for m in available_models if f"{detailed_season}_adjusted" in m]
            if season_models:
                water_req_key = season_models[0]
                print(f"Warning: No model for {altitude} altitude. Using {water_req_key} instead.")
        
        # Fourth try: use any available water requirement model
        if not water_req_key:
            water_models = [m for m in available_models if "_altitude" in m and "_adjusted" in m]
            if water_models:
                water_req_key = water_models[0]
                print(f"Warning: No specific model found for {altitude} altitude {detailed_season}. Using {water_req_key} as fallback.")
            else:
                # If no water requirement models are found, we'll proceed without water predictions
                print(f"Warning: No water requirement models found. Proceeding with nutrient predictions only.")
                water_req_key = None
        
        # Try to load all required models
        target_variables = base_targets.copy()
        if water_req_key:
            target_variables.append(water_req_key)
            
        missing_models = []
        
        for target in target_variables:
            model_path = os.path.join(soil_dir, f"{target}_model.joblib")
            if os.path.exists(model_path):
                try:
                    soil_models[target] = joblib.load(model_path)
                except Exception as e:
                    print(f"Error loading model {target}: {str(e)}")
                    missing_models.append(f"{target} (Error: {str(e)})")
            else:
                missing_models.append(target)
        
        # If we're missing nutrient models, try to use base models directly from dataset
        if any(target in missing_models for target in base_targets):
            print("Warning: Some nutrient models are missing. Using dataset values directly.")
            for nutrient in base_targets:
                if nutrient in missing_models and nutrient.replace('adjusted_', '') in crop_data.columns:
                    base_nutrient = nutrient.replace('adjusted_', '')
                    soil_models[nutrient] = {
                        'direct_value': float(crop_data[base_nutrient].values[0])
                    }
                    missing_models.remove(nutrient)
        
        # If we're still missing crucial models after fallbacks, return error
        if missing_models and all(target in missing_models for target in base_targets):
            return {"error": f"Critical models not found: {', '.join(missing_models)}"}
        
        # Make predictions for each target variable or use direct values
        predictions = {}
        for target, model in soil_models.items():
            if isinstance(model, dict) and 'direct_value' in model:
                # Use direct value from the dataset
                predictions[target] = model['direct_value']
            else:
                # Use the model to predict
                try:
                    # Check if input_features has all columns the model expects
                    if hasattr(model, 'feature_names_in_'):
                        missing_cols = set(model.feature_names_in_) - set(input_features.columns)
                        for col in missing_cols:
                            input_features[col] = 0.0  # Add missing columns with default values
                    
                    predictions[target] = model.predict(input_features)[0]
                except Exception as e:
                    print(f"Error predicting with {target} model: {str(e)}")
                    # Use a default value based on dataset if prediction fails
                    if target.replace('adjusted_', '') in crop_data.columns:
                        base_value = crop_data[target.replace('adjusted_', '')].values[0]
                        predictions[target] = float(base_value)
                        print(f"Using default value for {target}: {predictions[target]}")
                    else:
                        # Use reasonable defaults if all else fails
                        defaults = {
                            'adjusted_nitrogen': 50.0,
                            'adjusted_phosphorus': 25.0,
                            'adjusted_potassium': 30.0
                        }
                        if target in defaults:
                            predictions[target] = defaults[target]
                            print(f"Using standard default for {target}: {predictions[target]}")
                        else:
                            # For water requirements, use a reasonable default based on season
                            water_defaults = {
                                'short_dry': 450,
                                'long_rainy': 200,
                                'long_dry': 550,
                                'short_rainy': 350
                            }
                            predictions[target] = water_defaults.get(season, 400)
                            print(f"Using seasonal default water value: {predictions[target]}")
        
        # Apply seasonal adjustments for nutrient requirements
        seasonal_factors = {
            'short_dry': {
                'nitrogen_factor': 1.0,
                'phosphorus_factor': 1.0,
                'potassium_factor': 1.0,
                'yield_factor': 1.0
            },
            'long_rainy': {
                'nitrogen_factor': 1.25,
                'phosphorus_factor': 0.9,
                'potassium_factor': 1.15,
                'yield_factor': 1.1
            },
            'long_dry': {
                'nitrogen_factor': 0.9,
                'phosphorus_factor': 1.1,
                'potassium_factor': 0.95,
                'yield_factor': 0.9
            },
            'short_rainy': {
                'nitrogen_factor': 1.15,
                'phosphorus_factor': 0.95,
                'potassium_factor': 1.05,
                'yield_factor': 1.05
            }
        }
        
        # Apply seasonal adjustment factors
        nitrogen = predictions.get('adjusted_nitrogen', 50.0) * seasonal_factors[season]['nitrogen_factor']
        phosphorus = predictions.get('adjusted_phosphorus', 25.0) * seasonal_factors[season]['phosphorus_factor']
        potassium = predictions.get('adjusted_potassium', 30.0) * seasonal_factors[season]['potassium_factor']
        
        # Select the water requirement based on altitude and season
        water_requirement = predictions.get(water_req_key, None)
        
        # If water_requirement is still None, provide a default based on season and altitude
        if water_requirement is None:
            # Base defaults by season (in mm)
            water_defaults = {
                'short_dry': 450,
                'long_rainy': 200,
                'long_dry': 550,
                'short_rainy': 350
            }
            
            # Altitude adjustment factors
            altitude_factors = {
                'low': 1.2,  # Higher water requirement in low altitude
                'mid': 1.0,  # Base reference
                'high': 0.8   # Lower water requirement in high altitude (cooler, less evaporation)
            }
            
            # Use the default for the season, adjusted by altitude
            base_water = water_defaults.get(season, 400)
            altitude_factor = altitude_factors.get(altitude, 1.0)
            water_requirement = base_water * altitude_factor
            print(f"Using calculated default water requirement: {water_requirement} mm")
        
        # Format the results
        requirements = {
            "crop": crop_name,
            "soil_type": soil_type,
            "season": season,
            "altitude": altitude,
            "requirements": {
                "nitrogen_kg_per_ha": round(nitrogen, 2),
                "phosphorus_kg_per_ha": round(phosphorus, 2),
                "potassium_kg_per_ha": round(potassium, 2),
                "water_requirement_mm": round(water_requirement, 2) if water_requirement else None,
            }
        }
        
        # Add optional fields if they exist in the dataset
        if 'optimal_ph' in crop_data:
            requirements["requirements"]["optimal_ph"] = float(crop_data['optimal_ph'].values[0])
        
        if 'min_sunlight_hours' in crop_data:
            requirements["requirements"]["min_sunlight_hours"] = int(crop_data['min_sunlight_hours'].values[0])
        
        # Add planting information if available
        planting_fields = ['row_spacing_cm', 'plant_spacing_cm', 'planting_depth_cm']
        if all(field in crop_data.columns for field in planting_fields):
            planting_info = {
                "row_spacing_cm": int(crop_data['row_spacing_cm'].values[0]),
                "plant_spacing_cm": int(crop_data['plant_spacing_cm'].values[0]),
                "planting_depth_cm": int(crop_data['planting_depth_cm'].values[0]),
            }
            requirements["requirements"]["planting_info"] = planting_info
        
        # Add expected yield if available
        if 'optimal_yield' in crop_data.columns:
            base_yield = float(crop_data['optimal_yield'].values[0])
            # Apply yield factor based on season
            adjusted_yield = base_yield * seasonal_factors[season]['yield_factor']
            requirements["expected_yield_tons_per_ha"] = round(adjusted_yield, 2)
        
        # Add intercropping recommendation if available
        if 'intercropping_compatibility' in crop_data.columns:
            intercrop_value = crop_data['intercropping_compatibility'].values[0]
            if isinstance(intercrop_value, str) and intercrop_value != 'None':
                requirements["intercropping_recommendation"] = intercrop_value.split(',')
        
        # Add season-specific recommendations for Rwanda's four seasons
        if season == 'short_dry':
            requirements["seasonal_recommendations"] = [
                "Implement water conservation techniques",
                "Consider drought-resistant varieties",
                "Apply mulch to reduce evaporation",
                "Use drip irrigation if available"
            ]
        elif season == 'long_rainy':
            requirements["seasonal_recommendations"] = [
                "Ensure proper drainage systems to prevent waterlogging",
                "Monitor closely for fungal diseases",
                "Consider raised beds in low-lying areas",
                "Implement erosion control measures on slopes"
            ]
        elif season == 'long_dry':
            requirements["seasonal_recommendations"] = [
                "Increase irrigation frequency and volume",
                "Use deep mulching to preserve soil moisture",
                "Consider shade structures for sensitive crops",
                "Implement windbreaks to reduce evapotranspiration"
            ]
        elif season == 'short_rainy':
            requirements["seasonal_recommendations"] = [
                "Monitor drainage but prepare for dry spells",
                "Implement integrated pest management for seasonal pests",
                "Consider cover crops to prevent soil erosion",
                "Time planting to maximize use of rainfall patterns"
            ]
            
        # Add note about model limitations if fallbacks were used
        if missing_models:
            requirements["model_notes"] = f"Some models were unavailable ({', '.join(missing_models)}). Results may be less accurate."
        
        return requirements
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Detailed error: {error_details}")
        return {"error": f"Error predicting requirements: {str(e)}"}



def make_crop_requirement_prediction(predicted_soil):
    try:
        df = pd.read_csv('data/comprehensive_crop_requirements.csv')
    except FileNotFoundError:
        print("Error: Dataset file not found. Please make sure the file exists.")
        return
    
    cleaned_df = clean_dataset(df)
    soil_dfs = create_soil_specific_features(cleaned_df)
    models = train_crop_requirement_models(soil_dfs)
    save_models(models)
    
    print("\nTesting seasonal predictions...")
    
    crop = input("Enter crop name (e.g., Rice, Maize): ")
    soil = predicted_soil
    altitude = input("Enter altitude level (low/mid/high): ").lower()
    
    # Modified to handle Rwanda's four seasons
    print("\nRwanda's seasons:")
    print("1. Short dry season (mid-December to mid-March)")
    print("2. Long rainy season (mid-March to mid-May)")
    print("3. Long dry season (mid-May to mid-September)")
    print("4. Short rainy season (mid-September to mid-December)")
    
    season_choice = input("Choose season (1-4): ")
    
    # Map season choice to appropriate format for the prediction function
    season_mapping = {
        '1': 'short_dry',
        '2': 'long_rainy',
        '3': 'long_dry',
        '4': 'short_rainy'
    }
    
    # Full season names for display
    season_names = {
        '1': 'Short dry season',
        '2': 'Long rainy season',
        '3': 'Long dry season',
        '4': 'Short rainy season'
    }
    
    if season_choice not in season_mapping:
        print(f"Invalid season choice '{season_choice}'. Using Short dry season as default.")
        season_choice = '1'
    
    season = season_mapping[season_choice]
    season_name = season_names[season_choice]
        
    if altitude not in ['low', 'mid', 'high']:
        print(f"Invalid altitude '{altitude}'. Using 'mid' as default.")
        altitude = 'mid'
    
    # Call predict_crop_requirements with the appropriate parameters
    try:
        prediction = predict_crop_requirements(crop, soil, altitude=altitude, season=season)
        
        # Check if there's an error in the response
        if 'error' in prediction:
            print(f"\nError in prediction: {prediction['error']}")
        else:
            print("\n" + "="*50)
            print(f"PREDICTION RESULTS:")
            print("="*50)
            print(f"Crop: {prediction['crop']}")
            print(f"Soil Type: {prediction['soil_type']}")
            print(f"Season: {season_name}")  # Use the full season name
            print(f"Altitude: {prediction['altitude']}")
            print("\nCROP REQUIREMENTS:")
            print("-"*50)
            print(f"Nitrogen: {prediction['requirements']['nitrogen_kg_per_ha']} kg/ha")
            print(f"Phosphorus: {prediction['requirements']['phosphorus_kg_per_ha']} kg/ha")
            print(f"Potassium: {prediction['requirements']['potassium_kg_per_ha']} kg/ha")
            print(f"Water: {prediction['requirements']['water_requirement_mm']} mm")
            
            # Rest of your printing code remains the same
            if 'optimal_ph' in prediction['requirements']:
                print(f"Optimal pH: {prediction['requirements']['optimal_ph']}")
            
            if 'planting_info' in prediction['requirements']:
                print("\nPLANTING INFORMATION:")
                print("-"*50)
                planting = prediction['requirements']['planting_info']
                print(f"Row Spacing: {planting['row_spacing_cm']} cm")
                print(f"Plant Spacing: {planting['plant_spacing_cm']} cm")
                print(f"Planting Depth: {planting['planting_depth_cm']} cm")
            
            if 'expected_yield_tons_per_ha' in prediction:
                print("\nYIELD INFORMATION:")
                print("-"*50)
                print(f"Expected Yield: {prediction['expected_yield_tons_per_ha']} tons/ha")
            
            if 'intercropping_recommendation' in prediction:
                print("\nINTERCROPPING RECOMMENDATIONS:")
                print("-"*50)
                print(f"Compatible crops: {', '.join(prediction['intercropping_recommendation'])}")
            
            if 'seasonal_recommendations' in prediction:
                print("\nSEASONAL RECOMMENDATIONS:")
                print("-"*50)
                for i, rec in enumerate(prediction['seasonal_recommendations'], 1):
                    print(f"{i}. {rec}")
    except Exception as e:
        print(f"Error processing prediction: {str(e)}")

predicted_data = make_crop_requirement_prediction(predict_soil(district_name, sector_name))
print(predicted_data)
    
    


Predicted soil texture for , : Error: The combination of District '' and Sector '' was not found in the dataset.
Error: The combination of District '' and Sector '' was not found in the dataset.
Predicted soil texture for , : Error: The combination of District '' and Sector '' was not found in the dataset.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cleaned_df[col].fillna(cleaned_df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate objec


Testing seasonal predictions...

Rwanda's seasons:
1. Short dry season (mid-December to mid-March)
2. Long rainy season (mid-March to mid-May)
3. Long dry season (mid-May to mid-September)
4. Short rainy season (mid-September to mid-December)

PREDICTION RESULTS:
Crop: Rice
Soil Type: acrisol
Season: Short dry season
Altitude: low

CROP REQUIREMENTS:
--------------------------------------------------
Nitrogen: 90.0 kg/ha
Phosphorus: 59.34 kg/ha
Potassium: 57.96 kg/ha
Water: 1328.8 mm
Optimal pH: 6.0

PLANTING INFORMATION:
--------------------------------------------------
Row Spacing: 30 cm
Plant Spacing: 15 cm
Planting Depth: 2 cm

YIELD INFORMATION:
--------------------------------------------------
Expected Yield: 5.0 tons/ha

SEASONAL RECOMMENDATIONS:
--------------------------------------------------
1. Implement water conservation techniques
2. Consider drought-resistant varieties
3. Apply mulch to reduce evaporation
4. Use drip irrigation if available
None


In [12]:
import pandas as pd
from joblib import load
import os

def predict_soil_texture(district, sector):
    try:
        # Load the model and preprocessor
        model_path = 'models/best_soil_texture_model.joblib'
        preprocessor_path = 'models/soil_preprocessor.joblib'
        
        if not os.path.exists(model_path) or not os.path.exists(preprocessor_path):
            return "Error: Model or preprocessor file not found."
        
        model = load(model_path)
        preprocessor = load(preprocessor_path)
        
        # Load the original dataset to get coordinates
        try:
            dataset = pd.read_csv('./Rwanda soil data/rwanda_soilTypes.csv')
            
            # Find average coordinates for the specific sector
            sector_data = dataset[(dataset['District'] == district) & (dataset['Sector'] == sector)]
            
            if len(sector_data) > 0:
                # Use average coordinates for the sector
                latitude = sector_data['Latitude'].mean()
                longitude = sector_data['Longitude'].mean()
            else:
                return f"Error: The combination of District '{district}' and Sector '{sector}' was not found in the dataset."
                
        except FileNotFoundError:
            return "Error: Dataset file not found."
        
        # Create a dataframe with the input data
        input_data = pd.DataFrame({
            'District': [district],
            'Latitude': [latitude],
            'Longitude': [longitude]
        })

        # Get a sample of the training data to fit the preprocessor
        training_data = dataset[['District', 'Latitude', 'Longitude']]
        
        # Fit the preprocessor on the training data
        preprocessor.fit(training_data)
        
        # Now transform the input data
        X_processed = preprocessor.transform(input_data)
        
        # Make the prediction
        prediction = model.predict(X_processed)[0]
        
        return prediction
    
    except Exception as e:
        return f"Error during prediction: {str(e)}"

def predict_crop_requirements(crop_name, soil_type, model_dir='models', altitude='mid', season='short_dry'):
    """
    Predict crop requirements for best yield based on crop name, soil type and season.
    
    Parameters:
    - crop_name: Name of the crop
    - soil_type: Type of soil ('sandy', 'loamy', 'clay', 'silty', 'peaty', etc.)
    - model_dir: Directory where models are stored
    - altitude: Altitude level ('low', 'mid', 'high')
    - season: Growing season in Rwanda ('short_dry', 'long_rainy', 'long_dry', 'short_rainy')
    
    Returns:
    - Dictionary of predicted requirements for best yield or error message
    """
    try:
        # Check if model directory exists
        if not os.path.exists(model_dir):
            return {"error": f"Model directory '{model_dir}' not found."}
            
        # Check if soil type subdirectory exists
        soil_dir = os.path.join(model_dir, soil_type)
        if not os.path.exists(soil_dir):
            # Try to find an alternative soil type
            available_soils = [d for d in os.listdir(model_dir) 
                              if os.path.isdir(os.path.join(model_dir, d))]
            if not available_soils:
                return {"error": f"No soil type models found in '{model_dir}'."}
            
            # Use the first available soil type as fallback
            soil_type = available_soils[0]
            soil_dir = os.path.join(model_dir, soil_type)
            print(f"Warning: Soil type '{soil_type}' not found. Using '{soil_type}' instead.")
        
        # Load the comprehensive dataset
        try:
            df = pd.read_csv('data/comprehensive_crop_requirements.csv')
        except FileNotFoundError:
            return {"error": "Dataset file not found. Please make sure 'data/comprehensive_crop_requirements.csv' exists."}
        
        # Get the row for this crop
        crop_data = df[df['crop'].str.lower() == crop_name.lower()]
        
        if len(crop_data) == 0:
            # Try to find a close match using fuzzy matching
            from difflib import get_close_matches
            all_crops = df['crop'].unique()
            close_matches = get_close_matches(crop_name, all_crops)
            
            if close_matches:
                crop_name = close_matches[0]
                crop_data = df[df['crop'] == crop_name]
                print(f"Warning: Crop '{crop_name}' not found. Using closest match '{crop_name}' instead.")
            else:
                return {"error": f"Crop '{crop_name}' not found in the database and no close matches were found."}
        
        # Create input features with a copy of crop data
        input_features = crop_data.copy()
        
        # Map Rwanda's seasons to the format in the dataset
        season_mapping = {
            'short_dry': 'short_dry',      # Dec-Feb
            'long_rainy': 'long_rainy',    # Mar-May
            'long_dry': 'long_dry',        # Jun-Sep
            'short_rainy': 'short_rainy'   # Sep-Dec
        }
        
        detailed_season = season_mapping.get(season, 'short_dry')  # Default to short_dry if unknown
        
        # Add missing altitude and season columns that models might expect
        altitude_types = ['low', 'mid', 'high']
        season_types = ['dry', 'wet', 'short_dry', 'long_rainy', 'long_dry', 'short_rainy']
        
        # Generate all possible altitude-season combinations and ensure they exist in input_features
        for alt in altitude_types:
            for seas in season_types:
                col_name = f"{alt}_altitude_{seas}_adjusted"
                if col_name not in input_features.columns:
                    input_features[col_name] = 0.0  # Add with default value
                
        # Also ensure base altitude columns exist
        for alt in altitude_types:
            col_name = f"{alt}_altitude_adjusted"
            if col_name not in input_features.columns:
                input_features[col_name] = 0.0
        
        # Load the models for this soil type
        soil_models = {}
        
        # Define target variables - both general and season-specific
        base_targets = ['adjusted_nitrogen', 'adjusted_phosphorus', 'adjusted_potassium']
        
        # Check available model files directly
        available_models = []
        for filename in os.listdir(soil_dir):
            if filename.endswith('_model.joblib'):
                model_name = filename.replace('_model.joblib', '')
                available_models.append(model_name)
        
        # Find the best matching water requirement column/model
        water_req_key = None
        
        # First try: exact match for altitude and season
        exact_match = f"{altitude}_altitude_{detailed_season}_adjusted"
        if exact_match in available_models:
            water_req_key = exact_match
        
        # Second try: match with any season at this altitude
        if not water_req_key:
            altitude_models = [m for m in available_models if f"{altitude}_altitude" in m and "_adjusted" in m]
            if altitude_models:
                water_req_key = altitude_models[0]
                print(f"Warning: No exact match for {altitude} altitude {detailed_season}. Using {water_req_key} instead.")
        
        # Third try: match with any altitude for this season
        if not water_req_key:
            season_models = [m for m in available_models if f"{detailed_season}_adjusted" in m]
            if season_models:
                water_req_key = season_models[0]
                print(f"Warning: No model for {altitude} altitude. Using {water_req_key} instead.")
        
        # Fourth try: use any available water requirement model
        if not water_req_key:
            water_models = [m for m in available_models if "_altitude" in m and "_adjusted" in m]
            if water_models:
                water_req_key = water_models[0]
                print(f"Warning: No specific model found for {altitude} altitude {detailed_season}. Using {water_req_key} as fallback.")
            else:
                # If no water requirement models are found, we'll proceed without water predictions
                print(f"Warning: No water requirement models found. Proceeding with nutrient predictions only.")
                water_req_key = None
        
        # Try to load all required models
        target_variables = base_targets.copy()
        if water_req_key:
            target_variables.append(water_req_key)
            
        missing_models = []
        
        for target in target_variables:
            model_path = os.path.join(soil_dir, f"{target}_model.joblib")
            if os.path.exists(model_path):
                try:
                    soil_models[target] = load(model_path)
                except Exception as e:
                    print(f"Error loading model {target}: {str(e)}")
                    missing_models.append(f"{target} (Error: {str(e)})")
            else:
                missing_models.append(target)
        
        # If we're missing nutrient models, try to use base models directly from dataset
        if any(target in missing_models for target in base_targets):
            print("Warning: Some nutrient models are missing. Using dataset values directly.")
            for nutrient in base_targets:
                if nutrient in missing_models and nutrient.replace('adjusted_', '') in crop_data.columns:
                    base_nutrient = nutrient.replace('adjusted_', '')
                    soil_models[nutrient] = {
                        'direct_value': float(crop_data[base_nutrient].values[0])
                    }
                    missing_models.remove(nutrient)
        
        # If we're still missing crucial models after fallbacks, return error
        if missing_models and all(target in missing_models for target in base_targets):
            return {"error": f"Critical models not found: {', '.join(missing_models)}"}
        
        # Make predictions for each target variable or use direct values
        predictions = {}
        for target, model in soil_models.items():
            if isinstance(model, dict) and 'direct_value' in model:
                # Use direct value from the dataset
                predictions[target] = model['direct_value']
            else:
                # Use the model to predict
                try:
                    # Check if input_features has all columns the model expects
                    if hasattr(model, 'feature_names_in_'):
                        missing_cols = set(model.feature_names_in_) - set(input_features.columns)
                        for col in missing_cols:
                            input_features[col] = 0.0  # Add missing columns with default values
                    
                    predictions[target] = model.predict(input_features)[0]
                except Exception as e:
                    print(f"Error predicting with {target} model: {str(e)}")
                    # Use a default value based on dataset if prediction fails
                    if target.replace('adjusted_', '') in crop_data.columns:
                        base_value = crop_data[target.replace('adjusted_', '')].values[0]
                        predictions[target] = float(base_value)
                        print(f"Using default value for {target}: {predictions[target]}")
                    else:
                        # Use reasonable defaults if all else fails
                        defaults = {
                            'adjusted_nitrogen': 50.0,
                            'adjusted_phosphorus': 25.0,
                            'adjusted_potassium': 30.0
                        }
                        if target in defaults:
                            predictions[target] = defaults[target]
                            print(f"Using standard default for {target}: {predictions[target]}")
                        else:
                            # For water requirements, use a reasonable default based on season
                            water_defaults = {
                                'short_dry': 450,
                                'long_rainy': 200,
                                'long_dry': 550,
                                'short_rainy': 350
                            }
                            predictions[target] = water_defaults.get(season, 400)
                            print(f"Using seasonal default water value: {predictions[target]}")
        
        # Apply seasonal adjustments for nutrient requirements
        seasonal_factors = {
            'short_dry': {
                'nitrogen_factor': 1.0,
                'phosphorus_factor': 1.0,
                'potassium_factor': 1.0,
                'yield_factor': 1.0
            },
            'long_rainy': {
                'nitrogen_factor': 1.25,
                'phosphorus_factor': 0.9,
                'potassium_factor': 1.15,
                'yield_factor': 1.1
            },
            'long_dry': {
                'nitrogen_factor': 0.9,
                'phosphorus_factor': 1.1,
                'potassium_factor': 0.95,
                'yield_factor': 0.9
            },
            'short_rainy': {
                'nitrogen_factor': 1.15,
                'phosphorus_factor': 0.95,
                'potassium_factor': 1.05,
                'yield_factor': 1.05
            }
        }
        
        # Apply seasonal adjustment factors
        nitrogen = predictions.get('adjusted_nitrogen', 50.0) * seasonal_factors[season]['nitrogen_factor']
        phosphorus = predictions.get('adjusted_phosphorus', 25.0) * seasonal_factors[season]['phosphorus_factor']
        potassium = predictions.get('adjusted_potassium', 30.0) * seasonal_factors[season]['potassium_factor']
        
        # Select the water requirement based on altitude and season
        water_requirement = predictions.get(water_req_key, None)
        
        # If water_requirement is still None, provide a default based on season and altitude
        if water_requirement is None:
            # Base defaults by season (in mm)
            water_defaults = {
                'short_dry': 450,
                'long_rainy': 200,
                'long_dry': 550,
                'short_rainy': 350
            }
            
            # Altitude adjustment factors
            altitude_factors = {
                'low': 1.2,  # Higher water requirement in low altitude
                'mid': 1.0,  # Base reference
                'high': 0.8   # Lower water requirement in high altitude (cooler, less evaporation)
            }
            
            # Use the default for the season, adjusted by altitude
            base_water = water_defaults.get(season, 400)
            altitude_factor = altitude_factors.get(altitude, 1.0)
            water_requirement = base_water * altitude_factor
            print(f"Using calculated default water requirement: {water_requirement} mm")
        
        # Format the results
        requirements = {
            "crop": crop_name,
            "soil_type": soil_type,
            "season": season,
            "altitude": altitude,
            "requirements": {
                "nitrogen_kg_per_ha": round(nitrogen, 2),
                "phosphorus_kg_per_ha": round(phosphorus, 2),
                "potassium_kg_per_ha": round(potassium, 2),
                "water_requirement_mm": round(water_requirement, 2) if water_requirement else None,
            }
        }
        
        # Add optional fields if they exist in the dataset
        if 'optimal_ph' in crop_data:
            requirements["requirements"]["optimal_ph"] = float(crop_data['optimal_ph'].values[0])
        
        if 'min_sunlight_hours' in crop_data:
            requirements["requirements"]["min_sunlight_hours"] = int(crop_data['min_sunlight_hours'].values[0])
        
        # Add planting information if available
        planting_fields = ['row_spacing_cm', 'plant_spacing_cm', 'planting_depth_cm']
        if all(field in crop_data.columns for field in planting_fields):
            planting_info = {
                "row_spacing_cm": int(crop_data['row_spacing_cm'].values[0]),
                "plant_spacing_cm": int(crop_data['plant_spacing_cm'].values[0]),
                "planting_depth_cm": int(crop_data['planting_depth_cm'].values[0]),
            }
            requirements["requirements"]["planting_info"] = planting_info
        
        # Add expected yield if available
        if 'optimal_yield' in crop_data.columns:
            base_yield = float(crop_data['optimal_yield'].values[0])
            # Apply yield factor based on season
            adjusted_yield = base_yield * seasonal_factors[season]['yield_factor']
            requirements["expected_yield_tons_per_ha"] = round(adjusted_yield, 2)
        
        # Add intercropping recommendation if available
        if 'intercropping_compatibility' in crop_data.columns:
            intercrop_value = crop_data['intercropping_compatibility'].values[0]
            if isinstance(intercrop_value, str) and intercrop_value != 'None':
                requirements["intercropping_recommendation"] = intercrop_value.split(',')
        
        # Add season-specific recommendations for Rwanda's four seasons
        if season == 'short_dry':
            requirements["seasonal_recommendations"] = [
                "Implement water conservation techniques",
                "Consider drought-resistant varieties",
                "Apply mulch to reduce evaporation",
                "Use drip irrigation if available"
            ]
        elif season == 'long_rainy':
            requirements["seasonal_recommendations"] = [
                "Ensure proper drainage systems to prevent waterlogging",
                "Monitor closely for fungal diseases",
                "Consider raised beds in low-lying areas",
                "Implement erosion control measures on slopes"
            ]
        elif season == 'long_dry':
            requirements["seasonal_recommendations"] = [
                "Increase irrigation frequency and volume",
                "Use deep mulching to preserve soil moisture",
                "Consider shade structures for sensitive crops",
                "Implement windbreaks to reduce evapotranspiration"
            ]
        elif season == 'short_rainy':
            requirements["seasonal_recommendations"] = [
                "Monitor drainage but prepare for dry spells",
                "Implement integrated pest management for seasonal pests",
                "Consider cover crops to prevent soil erosion",
                "Time planting to maximize use of rainfall patterns"
            ]
            
        # Add note about model limitations if fallbacks were used
        if missing_models:
            requirements["model_notes"] = f"Some models were unavailable ({', '.join(missing_models)}). Results may be less accurate."
        
        return requirements
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Detailed error: {error_details}")
        return {"error": f"Error predicting requirements: {str(e)}"}

def main():
    print("==== Rwanda Agriculture Advisory System ====")
    print("This system predicts soil type and crop requirements based on location and crop information.")
    
    # Collect user inputs once
    district_name = input("\nEnter district name: ")
    sector_name = input("Enter sector name: ")
    
    # Predict soil texture
    print("\nAnalyzing soil data for this location...")
    soil_prediction = predict_soil_texture(district_name, sector_name)
    
    if isinstance(soil_prediction, str) and soil_prediction.startswith("Error"):
        print(f"\n❌ {soil_prediction}")
        return
    
    print(f"\n✅ Predicted soil texture for {district_name}, {sector_name}: {soil_prediction}")
    
    # Collect crop information
    crop_name = input("\nEnter crop name (e.g., Rice, Maize): ")
    
    # print("\nAltitude levels:")
    # print("1. Low altitude")
    # print("2. Mid altitude")
    # print("3. High altitude")
    altitude_choice = input("Choose altitude (1-3): ")
    
    altitude_mapping = {
        '1': 'low',
        '2': 'mid',
        '3': 'high'
    }
    
    altitude = altitude_mapping.get(altitude_choice, 'mid')
    
    # Rwanda's four seasons
    # print("\nRwanda's seasons:")
    # print("1. Short dry season (mid-December to mid-March)")
    # print("2. Long rainy season (mid-March to mid-May)")
    # print("3. Long dry season (mid-May to mid-September)")
    # print("4. Short rainy season (mid-September to mid-December)")
    
    season_choice = input("Choose season (1-4): ")
    
    # Map season choice to appropriate format
    season_mapping = {
        '1': 'short_dry',
        '2': 'long_rainy',
        '3': 'long_dry',
        '4': 'short_rainy'
    }
    
    # Full season names for display
    season_names = {
        '1': 'Short dry season',
        '2': 'Long rainy season',
        '3': 'Long dry season',
        '4': 'Short rainy season'
    }
    
    season = season_mapping.get(season_choice, 'short_dry')
    season_name = season_names.get(season_choice, 'Short dry season')
    
    # Make crop requirement prediction
    print("\nGenerating crop requirements for these conditions...")
    prediction = predict_crop_requirements(crop_name, soil_prediction, altitude=altitude, season=season)
    
    # Display results
    if 'error' in prediction:
        print(f"\n❌ Error in prediction: {prediction['error']}")
    else:
        # print("\n" + "="*70)
        # print(f"RWANDA AGRICULTURAL ADVISORY SYSTEM - PREDICTION RESULTS")
        # print("="*70)
        # print(f"Location: {district_name} District, {sector_name} Sector")
        # print(f"Soil Type: {prediction['soil_type']}")
        # print(f"Crop: {prediction['crop']}")
        # print(f"Season: {season_name}")
        # print(f"Altitude: {altitude.capitalize()}")
        
        print("\n" + "="*70)
        print("CROP REQUIREMENTS")
        print("="*70)
        print(f"Nitrogen: {prediction['requirements']['nitrogen_kg_per_ha']} kg/ha")
        print(f"Phosphorus: {prediction['requirements']['phosphorus_kg_per_ha']} kg/ha")
        print(f"Potassium: {prediction['requirements']['potassium_kg_per_ha']} kg/ha")
        print(f"Water: {prediction['requirements']['water_requirement_mm']} mm")
        
        # if 'optimal_ph' in prediction['requirements']:
        #     print(f"Optimal pH: {prediction['requirements']['optimal_ph']}")
        
        if 'planting_info' in prediction['requirements']:
            print("\n" + "="*70)
            print("PLANTING INFORMATION")
            print("="*70)
            planting = prediction['requirements']['planting_info']
            print(f"Row Spacing: {planting['row_spacing_cm']} cm")
            print(f"Plant Spacing: {planting['plant_spacing_cm']} cm")
            # print(f"Planting Depth: {planting['planting_depth_cm']} cm")
        
        if 'expected_yield_tons_per_ha' in prediction:
            print("\n" + "="*70)
            print("YIELD INFORMATION")
            print("="*70)
            print(f"Expected Yield: {prediction['expected_yield_tons_per_ha']} tons/ha")
        
        if 'intercropping_recommendation' in prediction:
            print("\n" + "="*70)
            print("INTERCROPPING RECOMMENDATIONS")
            print("="*70)
            print(f"Compatible crops: {', '.join(prediction['intercropping_recommendation'])}")
        
        if 'seasonal_recommendations' in prediction:
            print("\n" + "="*70)
            print("SEASONAL RECOMMENDATIONS")
            print("="*70)
            for i, rec in enumerate(prediction['seasonal_recommendations'], 1):
                print(f"{i}. {rec}")
        
        print("\n" + "="*70)
        print("NOTE: This is an advisory system. Please consult with local agricultural")
        print("extension officers for specific advice for your farm.")
        print("="*70)

if __name__ == "__main__":
    main()

==== Rwanda Agriculture Advisory System ====
This system predicts soil type and crop requirements based on location and crop information.

Analyzing soil data for this location...

✅ Predicted soil texture for Gasabo, Gisozi: acrisol

Generating crop requirements for these conditions...

CROP REQUIREMENTS
Nitrogen: 89.46 kg/ha
Phosphorus: 59.46 kg/ha
Potassium: 56.35 kg/ha
Water: 1046.1 mm

PLANTING INFORMATION
Row Spacing: 30 cm
Plant Spacing: 15 cm

YIELD INFORMATION
Expected Yield: 5.0 tons/ha

SEASONAL RECOMMENDATIONS
1. Implement water conservation techniques
2. Consider drought-resistant varieties
3. Apply mulch to reduce evaporation
4. Use drip irrigation if available

NOTE: This is an advisory system. Please consult with local agricultural
extension officers for specific advice for your farm.
