In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

# Load your data
combined_df = pd.read_csv('rainfall_ndvi_lst_cleaned.csv')
combined_df['DATE'] = pd.to_datetime(combined_df['DATE'])

# Create a mapping of GID_2 codes to regions
def create_region_mapping(combined_df):
    """
    Create a mapping of GID_2 codes to human-readable region names
    """
    # Extract unique GID_2 codes and create meaningful names
    unique_gids = combined_df['GID_2'].unique()
    
    # Malawi region mapping (approximate - you can customize this)
    malawi_regions = {
        'MWI.1.1_1': 'Northern Region - Chitipa',
        'MWI.1.2_1': 'Northern Region - Karonga',
        'MWI.1.3_1': 'Northern Region - Rumphi',
        'MWI.2.1_1': 'Central Region - Kasungu',
        'MWI.2.2_1': 'Central Region - Nkhotakota',
        'MWI.2.3_1': 'Central Region - Ntchisi',
        'MWI.2.4_1': 'Central Region - Dowa',
        'MWI.2.5_1': 'Central Region - Salima',
        'MWI.2.6_1': 'Central Region - Lilongwe',
        'MWI.2.7_1': 'Central Region - Mchinji',
        'MWI.3.1_1': 'Southern Region - Mangochi',
        'MWI.3.2_1': 'Southern Region - Machinga',
        'MWI.3.3_1': 'Southern Region - Zomba',
        'MWI.3.4_1': 'Southern Region - Chiradzulu',
        'MWI.3.5_1': 'Southern Region - Blantyre',
        'MWI.3.6_1': 'Southern Region - Mwanza',
        'MWI.3.7_1': 'Southern Region - Thyolo',
        'MWI.3.8_1': 'Southern Region - Mulanje',
        'MWI.3.9_1': 'Southern Region - Phalombe',
        'MWI.4.1_1': 'Southern Region - Chikwawa',
        'MWI.4.2_1': 'Southern Region - Nsanje',
        'MWI.4.3_1': 'Southern Region - Balaka',
        'MWI.4.4_1': 'Southern Region - Neno',
    }
    
    # Add more mappings as needed for other countries
    region_mapping = {}
    for gid in unique_gids:
        if gid in malawi_regions:
            region_mapping[gid] = malawi_regions[gid]
        else:
            # Generic naming for unknown regions
            region_mapping[gid] = f"Region {gid}"
    
    return region_mapping

def get_available_options(combined_df):
    """
    Get available countries and regions for user selection
    """
    # Extract country codes from GID_2 (first 3 letters)
    combined_df['country_code'] = combined_df['GID_2'].str.slice(0, 3)
    
    # Map country codes to full names
    country_mapping = {
        'MWI': 'Malawi',
        'GHA': 'Ghana',
        'UGA': 'Uganda',
        'SEN': 'Senegal',
        'BEN': 'Benin'
    }
    
    available_countries = {}
    for code in combined_df['country_code'].unique():
        country_name = country_mapping.get(code, f"Country_{code}")
        available_countries[code] = country_name
    
    return available_countries, create_region_mapping(combined_df)

def display_available_options(available_countries, region_mapping):
    """
    Display available options to the user
    """
    print("🌍 AVAILABLE COUNTRIES:")
    print("=" * 40)
    for code, name in available_countries.items():
        print(f"{code}: {name}")
    
    print("\n📍 AVAILABLE REGIONS (for detailed analysis):")
    print("=" * 60)
    for gid, name in list(region_mapping.items())[:10]:  # Show first 10
        print(f"{gid}: {name}")
    
    if len(region_mapping) > 10:
        print(f"... and {len(region_mapping) - 10} more regions")
    
    print(f"\n💡 Tip: Type 'Malawi' for country-level forecast or a GID code for regional analysis")

def get_user_selection(available_countries, region_mapping):
    """
    Get user input for country/region selection
    """
    while True:
        print("\n" + "=" * 60)
        user_input = input("Enter country name (e.g., 'Malawi') or GID code (e.g., 'MWI.1.1_1'): ").strip()
        
        # Check for country name
        country_lower = user_input.lower()
        for code, name in available_countries.items():
            if country_lower == name.lower():
                return {'type': 'country', 'value': code, 'name': name}
        
        # Check for GID code
        if user_input in region_mapping:
            return {'type': 'region', 'value': user_input, 'name': region_mapping[user_input]}
        
        # Check for "malawi" specifically
        if country_lower == 'malawi':
            return {'type': 'country', 'value': 'MWI', 'name': 'Malawi'}
        
        print("❌ Invalid selection. Please choose from the available options.")
        display_available_options(available_countries, region_mapping)

def prepare_data_for_forecast(combined_df, selection, forecast_years=5):
    """
    Prepare data based on user selection
    """
    if selection['type'] == 'country':
        # Country-level analysis - aggregate all regions for that country
        country_code = selection['value']
        country_data = combined_df[combined_df['GID_2'].str.startswith(country_code)]
        
        if country_data.empty:
            print(f"❌ No data found for country code: {country_code}")
            return None
        
        # Aggregate by date
        aggregated_data = country_data.groupby('DATE').agg({
            'RAINFALL_MM': 'mean',
            'NDVI_VALUE': 'mean',
            'LST_VALUE': 'mean'
        }).sort_index()
        
        print(f"✅ Prepared country-level data for {selection['name']}")
        print(f"   Time range: {aggregated_data.index.min()} to {aggregated_data.index.max()}")
        print(f"   Total records: {len(aggregated_data)}")
        
        return aggregated_data
        
    else:
        # Region-level analysis
        region_gid = selection['value']
        region_data = combined_df[combined_df['GID_2'] == region_gid].sort_values('DATE')
        
        if region_data.empty:
            print(f"❌ No data found for region: {region_gid}")
            return None
        
        region_data = region_data.set_index('DATE')[['RAINFALL_MM', 'NDVI_VALUE', 'LST_VALUE']]
        
        print(f"✅ Prepared region-level data for {selection['name']}")
        print(f"   Time range: {region_data.index.min()} to {region_data.index.max()}")
        print(f"   Total records: {len(region_data)}")
        
        return region_data

def prophet_forecast(series, periods=115, yearly_seasonality=True):
    """
    Use Facebook's Prophet for robust forecasting
    """
    from prophet import Prophet
    
    prophet_df = pd.DataFrame({
        'ds': series.index,
        'y': series.values
    })
    
    model = Prophet(
        yearly_seasonality=yearly_seasonality,
        weekly_seasonality=False,
        daily_seasonality=False,
        changepoint_prior_scale=0.05
    )
    
    model.fit(prophet_df)
    future = model.make_future_dataframe(periods=periods, freq='16D')
    forecast = model.predict(future)
    
    forecast_series = forecast.set_index('ds')['yhat'][-periods:]
    confidence_lower = forecast.set_index('ds')['yhat_lower'][-periods:]
    confidence_upper = forecast.set_index('ds')['yhat_upper'][-periods:]
    
    return forecast_series, confidence_lower, confidence_upper

def run_forecast(data, selection, forecast_years=5):
    """
    Run the forecasting pipeline
    """
    periods = int(365 * forecast_years / 16)  # 16-day intervals
    
    print(f"\n🔮 Forecasting for {selection['name']} ({forecast_years} years)...")
    
    # Forecast each variable
    rainfall_forecast, rain_lower, rain_upper = prophet_forecast(data['RAINFALL_MM'], periods)
    ndvi_forecast, ndvi_lower, ndvi_upper = prophet_forecast(data['NDVI_VALUE'], periods)
    lst_forecast, lst_lower, lst_upper = prophet_forecast(data['LST_VALUE'], periods)
    
    # Create forecast DataFrame
    forecasts = pd.DataFrame({
        'RAINFALL_MM': rainfall_forecast.values,
        'NDVI_VALUE': ndvi_forecast.values,
        'LST_VALUE': lst_forecast.values
    }, index=rainfall_forecast.index)
    
    return forecasts, {
        'rainfall': {'lower': rain_lower, 'upper': rain_upper},
        'ndvi': {'lower': ndvi_lower, 'upper': ndvi_upper},
        'lst': {'lower': lst_lower, 'upper': lst_upper}
    }

def plot_results(historical_data, forecasts, ci_dict, selection):
    """
    Plot forecasting results
    """
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(16, 12))
    
    colors = ['#2E86AB', '#A23B72', '#F18F01']
    
    # Rainfall
    ax1.plot(historical_data.index, historical_data['RAINFALL_MM'], 
             label='Historical', linewidth=2, color=colors[0], alpha=0.8)
    ax1.plot(forecasts.index, forecasts['RAINFALL_MM'], 
             label='Forecast', linewidth=3, color=colors[0])
    ax1.fill_between(forecasts.index, ci_dict['rainfall']['lower'], ci_dict['rainfall']['upper'],
                    color=colors[0], alpha=0.2, label='95% CI')
    ax1.set_ylabel('Rainfall (mm)', fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # NDVI
    ax2.plot(historical_data.index, historical_data['NDVI_VALUE'], 
             label='Historical', linewidth=2, color=colors[1], alpha=0.8)
    ax2.plot(forecasts.index, forecasts['NDVI_VALUE'], 
             label='Forecast', linewidth=3, color=colors[1])
    ax2.fill_between(forecasts.index, ci_dict['ndvi']['lower'], ci_dict['ndvi']['upper'],
                    color=colors[1], alpha=0.2, label='95% CI')
    ax2.set_ylabel('NDVI Value', fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # LST
    ax3.plot(historical_data.index, historical_data['LST_VALUE'], 
             label='Historical', linewidth=2, color=colors[2], alpha=0.8)
    ax3.plot(forecasts.index, forecasts['LST_VALUE'], 
             label='Forecast', linewidth=3, color=colors[2])
    ax3.fill_between(forecasts.index, ci_dict['lst']['lower'], ci_dict['lst']['upper'],
                    color=colors[2], alpha=0.2, label='95% CI')
    ax3.set_ylabel('Temperature (°C)', fontweight='bold')
    ax3.set_xlabel('Year', fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    plt.suptitle(f'5-Year Forecast for {selection["name"]}\n', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

def generate_summary(forecasts, selection):
    """
    Generate forecast summary
    """
    print("\n" + "=" * 60)
    print(f"📊 FORECAST SUMMARY: {selection['name']}")
    print("=" * 60)
    
    print(f"\n📅 Forecast Period: {forecasts.index[0].strftime('%Y-%m-%d')} to {forecasts.index[-1].strftime('%Y-%m-%d')}")
    
    print(f"\n🌧️  Rainfall Forecast:")
    print(f"   Average: {forecasts['RAINFALL_MM'].mean():.1f} mm")
    print(f"   Range: {forecasts['RAINFALL_MM'].min():.1f} - {forecasts['RAINFALL_MM'].max():.1f} mm")
    
    print(f"\n🌿 Vegetation Health (NDVI):")
    print(f"   Average: {forecasts['NDVI_VALUE'].mean():.3f}")
    print(f"   Range: {forecasts['NDVI_VALUE'].min():.3f} - {forecasts['NDVI_VALUE'].max():.3f}")
    
    print(f"\n🌡️  Temperature Forecast:")
    print(f"   Average: {forecasts['LST_VALUE'].mean():.1f}°C")
    print(f"   Range: {forecasts['LST_VALUE'].min():.1f} - {forecasts['LST_VALUE'].max():.1f}°C")

def main():
    """
    Main interactive forecasting system
    """
    print("🌍 WELCOME TO THE CLIMATE FORECASTING SYSTEM")
    print("=" * 50)
    print("This system predicts Rainfall, Vegetation Health, and Temperature")
    print("for any available country or region for the next 5 years!")
    
    # Get available options
    available_countries, region_mapping = get_available_options(combined_df)
    
    while True:
        # Display options and get user selection
        display_available_options(available_countries, region_mapping)
        selection = get_user_selection(available_countries, region_mapping)
        
        # Prepare data
        data = prepare_data_for_forecast(combined_df, selection)
        if data is None:
            continue
        
        # Run forecast
        forecasts, ci_dict = run_forecast(data, selection)
        
        # Plot results
        plot_results(data, forecasts, ci_dict, selection)
        
        # Generate summary
        generate_summary(forecasts, selection)
        
        # Save results
        filename = f"forecast_{selection['name'].replace(' ', '_').lower()}.csv"
        forecasts.to_csv(filename)
        print(f"\n💾 Forecast saved to: {filename}")
        
        # Ask if user wants to continue
        print("\n" + "=" * 50)
        continue_choice = input("Would you like to forecast another region? (yes/no): ").strip().lower()
        if continue_choice not in ['yes', 'y']:
            print("Thank you for using the Climate Forecasting System! 👋")
            break

# Run the interactive system
if __name__ == "__main__":
    main()

🌍 WELCOME TO THE CLIMATE FORECASTING SYSTEM
This system predicts Rainfall, Vegetation Health, and Temperature
for any available country or region for the next 5 years!
🌍 AVAILABLE COUNTRIES:
MWI: Malawi

📍 AVAILABLE REGIONS (for detailed analysis):
MWI.1.1_1: Northern Region - Chitipa
MWI.1.2_1: Northern Region - Karonga
MWI.1.3_1: Northern Region - Rumphi
MWI.2.1_1: Central Region - Kasungu
MWI.2.2_1: Central Region - Nkhotakota
MWI.2.3_1: Central Region - Ntchisi
MWI.2.4_1: Central Region - Dowa
MWI.2.5_1: Central Region - Salima
MWI.2.6_1: Central Region - Lilongwe
MWI.2.7_1: Central Region - Mchinji
... and 228 more regions

💡 Tip: Type 'Malawi' for country-level forecast or a GID code for regional analysis

❌ Invalid selection. Please choose from the available options.
🌍 AVAILABLE COUNTRIES:
MWI: Malawi

📍 AVAILABLE REGIONS (for detailed analysis):
MWI.1.1_1: Northern Region - Chitipa
MWI.1.2_1: Northern Region - Karonga
MWI.1.3_1: Northern Region - Rumphi
MWI.2.1_1: Central Regio

KeyboardInterrupt: Interrupted by user