# We Can Answer "Which Countries Are Warming Fastest?" 

###  Ranking of fastest-warming countries
###  Actual warming rates in °C/decade
###  Regional patterns (from country names)
###  Data quality assessment

In [108]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from scipy import stats
#%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit

In [109]:
df = pd.read_csv("C:\\Users\\cw\\OneDrive\\Desktop\\Unified\\TemperaturesByCountry.csv")
df.head(5)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [110]:
df.shape

(577462, 4)

In [111]:
print(f"Missing values")
df.isnull().sum()

Missing values


dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

In [112]:
df['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecu

# Analysing the Dataset

In [114]:
def dataset_analysis(df):
    #encode fix
    df_clean = df.copy()
    df_clean['Country'] = df_clean['Country'].str.normalize('NFKD').str.encode('ascii',errors='ignore').str.decode('ascii')
    df_clean['dt']= pd.to_datetime(df_clean['dt'])
    df_clean['year']= df_clean['dt'].dt.year
    print(f"Time range: {df_clean['year'].min()} - {df_clean['year'].max()}")

    #Remove missing values
    df_clean = df_clean.dropna(subset=['AverageTemperature'])
    print(f"After removing missing values:{len(df_clean):,}")

    # Focus on modern period for better analysis
    modern_data = df_clean[df_clean['year'] >= 1950]
    print(f"Length of Modern data: {len(modern_data)}")

    return modern_data

country_modern_data= dataset_analysis(df)


Time range: 1743 - 2013
After removing missing values:544,811
Length of Modern data: 184907


# Fast Warming Analysis

In [116]:
def efficient_warming_analysis(df):
    print("\n Calculating Warming Rates for all countries ")

    results=[]

    for country, country_data in df.groupby('Country'):
        #Calculate annual averages
        annual_avg = country_data.groupby('year')['AverageTemperature'].mean().reset_index()

        if len(annual_avg)>=30: # Require 30+ years for reliable trend
            years= annual_avg['year'].values
            temps= annual_avg['AverageTemperature'].values

            #Calcualte Trend
            slope, intercept = np.polyfit(years,temps,1)
            warming_rate_per_decade = slope *10

            #Calculate R-Squared
            y_pred = slope * years + intercept
            ss_res = np.sum((temps - y_pred) ** 2)
            ss_tot = np.sum((temps - np.mean(temps))**2)
            r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
            

            results.append({
                'country': country,
                'warming_rate_c_per_decade': warming_rate_per_decade,
                'r_squared': r_squared,
                'data_points': len(annual_avg),
                'period': f"{years.min()}-{years.max()}",
                'mean_temp': temps.mean(),
                'quality': 'High' if r_squared > 0.5 else 'Medium' if r_squared > 0.2 else 'Low'
            })
            
    
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('warming_rate_c_per_decade', ascending=False)
    
    print(f"Countries with sufficient data: {len(results_df)}")
    return results_df

# Run efficient analysis
results = efficient_warming_analysis(country_modern_data)


 Calculating Warming Rates for all countries 
Countries with sufficient data: 242


# Top 30 Fastest-Warming Countries

In [118]:
def display_comprehensive_results(results):
    """Display comprehensive results for your global dataset"""
    print("\n TOP 30 FASTEST-WARMING COUNTRIES (1950+)")
    print()
    print(f"{'Rank':<4} {'Country':<30} {'Warming Rate':<14} {'Quality':<8} {'Period':<12} {'R²':<6}")
    print("-" * 80)
    
    for i, row in results.head(30).iterrows():
        print(f"{i+1:<4} {row['country']:<30} {row['warming_rate_c_per_decade']:+.3f}°C/decade {row['quality']:<8} {row['period']:<12} {row['r_squared']:.2f}")
    
    # Global statistics
    global_avg = results['warming_rate_c_per_decade'].mean()
    high_quality = results[results['quality'] == 'High']
    
    print(f"\n GLOBAL STATISTICS:")
    print(f"• Global average warming rate: {global_avg:+.3f}°C per decade")
    print(f"• Fastest warming: {results.iloc[0]['country']} ({results.iloc[0]['warming_rate_c_per_decade']:+.3f}°C/decade)")
    print(f"• Countries analyzed: {len(results)}")
    print(f"• High-quality trends (R² > 0.5): {len(high_quality)} countries")
    print(f"• Average data period: {results['data_points'].mean():.0f} years per country")

# Show comprehensive results
display_comprehensive_results(results)


 TOP 30 FASTEST-WARMING COUNTRIES (1950+)

Rank Country                        Warming Rate   Quality  Period       R²    
--------------------------------------------------------------------------------
226  Turkmenistan                   +0.331°C/decade Medium   1950-2013    0.44
145  Mongolia                       +0.328°C/decade Medium   1950-2013    0.46
116  Kazakhstan                     +0.318°C/decade Medium   1950-2013    0.36
181  Russia                         +0.317°C/decade Medium   1950-2013    0.43
235  Uzbekistan                     +0.314°C/decade Medium   1950-2013    0.40
106  Iran                           +0.307°C/decade High     1950-2013    0.53
40   Canada                         +0.303°C/decade Medium   1950-2013    0.35
1    Afghanistan                    +0.288°C/decade Medium   1950-2013    0.47
23   Belarus                        +0.273°C/decade Medium   1950-2013    0.28
121  Kyrgyzstan                     +0.272°C/decade Medium   1950-2013    0.44
230  

# Enhanced Top 30 Countries (Better for 242 countries)

In [120]:
def plot_enhanced_top_countries(results):
    """Enhanced bar chart for large number of countries"""
    
    top_30 = results.head(30)
    
    fig = px.bar(
        top_30,
        x='warming_rate_c_per_decade',
        y='country',
        orientation='h',
        title='<b>Top 30 Fastest-Warming Countries</b><br><sub>Based on 242 countries with sufficient data</sub>',
        color='warming_rate_c_per_decade',
        color_continuous_scale='RdYlBu_r',
        hover_data={
            'r_squared': ':.2f',
            'data_points': True,
            'period': True
        }
    )
    
    # Add reference lines
    global_avg = results['warming_rate_c_per_decade'].mean()
    fig.add_vline(x=global_avg, line_dash="dash", line_color="black", 
                  annotation_text=f"Global Avg: {global_avg:.3f}°C/decade")
    
    fig.update_layout(
        yaxis={'categoryorder': 'total ascending'},
        height=800,  # Taller to accommodate more countries
        showlegend=False,
        xaxis_title="Warming Rate (°C per decade)",
        yaxis_title=""
    )
    
    return fig

fig1 = plot_enhanced_top_countries(results)
fig1.show()

# World Map of Warming Rates

In [122]:
def plot_world_map(results):
    """Interactive world map of warming rates"""
    
    fig = px.choropleth(
        results,
        locations='country',
        locationmode='country names',
        color='warming_rate_c_per_decade',
        hover_name='country',
        hover_data={
            'warming_rate_c_per_decade': ':.3f',
            'r_squared': ':.2f',
            'data_points': True,
            'quality': True
        },
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0,
        title='<b>Global Warming Rates by Country</b><br><sub>Temperature Change (°C per decade, 1950+)</sub>',
        labels={'warming_rate_c_per_decade': 'Warming Rate (°C/decade)'}
    )
    
    fig.update_layout(
        geo=dict(
            showframe=False,
            showcoastlines=True,
            projection_type='equirectangular'
        ),
        height=500
    )
    
    return fig

# Plot world map
fig2 = plot_world_map(results)
fig2.show()

# Vulnerability Scoring

## "Which countries are not just warming fast, but are also least prepared to handle it?"

### WITHOUT VULNERABILITY:
country_insights = "These countries are warming fastest"

### WITH VULNERABILITY:
country_insights = """
 CLIMATE ACTION PRIORITIES:

URGENT ACTION NEEDED (High Vulnerability):
1. Central Asia - Agriculture collapse risk
2. Coastal developing nations - Sea-level rise
3. Arid regions - Water scarcity crisis

MANAGEABLE RISK (Medium Vulnerability):
1. European nations - Infrastructure adaptation
2. North America - Agricultural shifts

LOW PRIORITY (Low Vulnerability):
1. Wealthy nations - Research & innovation focus
"""

### Smart Proxy Vulnerability (No External Data Needed)

In [127]:
def simplest_robust_vulnerability(results):
    """
    Ultra-simple vulnerability using only columns we know exist
    """
    print(" Calculating SIMPLE & ROBUST vulnerability...")
    
    vulnerability_df = results.copy()
    
    # We KNOW these columns exist in your results:
    print("Available columns:", list(results.columns))
    
    # Use only confirmed available columns:
    # 1. Normalize warming rate (0-1 scale)
    vulnerability_df['warming_norm'] = (
        vulnerability_df['warming_rate_c_per_decade'] - 
        vulnerability_df['warming_rate_c_per_decade'].min()
    ) / (
        vulnerability_df['warming_rate_c_per_decade'].max() - 
        vulnerability_df['warming_rate_c_per_decade'].min()
    )
    
    # 2. Data quality (R-squared)
    vulnerability_df['quality_penalty'] = 1 - vulnerability_df['r_squared']
    
    # 3. Temporal coverage (data points)
    vulnerability_df['coverage_penalty'] = 1 - (vulnerability_df['data_points'] / vulnerability_df['data_points'].max())
    
    # Simple weighted combination
    vulnerability_df['vulnerability_score'] = (
        0.60 * vulnerability_df['warming_norm'] +      # Warming rate most important
        0.25 * vulnerability_df['quality_penalty'] +   # Data quality matters
        0.15 * vulnerability_df['coverage_penalty']    # Temporal coverage
    )
    
    # Create categories
    vulnerability_df['vulnerability_category'] = pd.cut(
        vulnerability_df['vulnerability_score'],
        bins=[0, 0.3, 0.5, 0.7, 1],
        labels=['Low', 'Medium', 'High', 'Critical']
    )
    
    print("Vulnerability distribution:")
    print(vulnerability_df['vulnerability_category'].value_counts().sort_index())
    
    return vulnerability_df

# Run the robust version
vulnerability_results = simplest_robust_vulnerability(results)


 Calculating SIMPLE & ROBUST vulnerability...
Available columns: ['country', 'warming_rate_c_per_decade', 'r_squared', 'data_points', 'period', 'mean_temp', 'quality']
Vulnerability distribution:
Low          67
Medium      125
High         44
Critical      6
Name: vulnerability_category, dtype: int64


In [128]:
def plot_vulnerability_results(vulnerability_results):
    """
    Create vulnerability visualizations
    """
    print(" Creating vulnerability visualizations...")
    
    # 1. Vulnerability World Map
    fig1 = px.choropleth(
        vulnerability_results,
        locations='country',
        locationmode='country names',
        color='vulnerability_category',
        category_orders={'vulnerability_category': ['Low', 'Medium', 'High', 'Critical']},
        color_discrete_map={
            'Low': '#2ecc71',
            'Medium': '#f39c12', 
            'High': '#e74c3c',
            'Critical': '#8b0000'
        },
        title='<b>Climate Vulnerability Hotspots</b><br><sub>Based on warming rate, data quality, and temporal coverage</sub>',
        hover_data={
            'warming_rate_c_per_decade': ':.3f',
            'vulnerability_score': ':.2f',
            'r_squared': ':.2f',
            'data_points': True
        }
    )
    
    fig1.update_layout(height=500)
    
    # 2. Top 20 Most Vulnerable Countries
    top_vulnerable = vulnerability_results.nlargest(20, 'vulnerability_score')
    
    fig2 = px.bar(
        top_vulnerable,
        x='vulnerability_score',
        y='country',
        color='vulnerability_category',
        orientation='h',
        title='<b>Top 20 Most Climate-Vulnerable Countries</b>',
        hover_data={
            'warming_rate_c_per_decade': ':.3f',
            'r_squared': ':.2f',
            'data_points': True
        },
        color_discrete_map={
            'High': '#e74c3c',
            'Critical': '#8b0000',
            'Medium': '#f39c12'
        }
    )
    
    fig2.update_layout(
        yaxis={'categoryorder': 'total ascending'},
        height=500,
        showlegend=False
    )
    
    return fig1, fig2

# Create visualizations
vuln_map, vuln_top = plot_vulnerability_results(vulnerability_results)
vuln_map.show()
vuln_top.show()


 Creating vulnerability visualizations...


# Temperature_BY_City

## Which cities are warming fastest?
## Urban Heat Island Effect: Do cities warm faster than their countries?
## Geographic Patterns: Are certain regions more affected?
## Urban Characteristics: How do latitude, coastal location affect urban warming?

In [131]:
df = pd.read_csv("C:\\Users\\cw\\OneDrive\\Desktop\\Unified\\TemperaturesByMajorCity.csv")
df.head(5)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


# Analyse City Data

In [133]:
def analyze_data(df):
    print(f"Analysing city Temperature dataset")

    df_clean = df.copy()
    df_clean['City']= df_clean['City'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('ascii')
    df_clean['Country']= df_clean['Country'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('ascii')

    #Convert date
    df_clean['dt'] = pd.to_datetime(df_clean['dt'])
    df_clean['year'] = df_clean['dt'].dt.year
    df_clean['month'] = df_clean['dt'].dt.month


     # Data quality assessment
    print(f"Total city records: {len(df_clean):,}")
    print(f"Unique cities: {df_clean['City'].nunique()}")
    print(f"Unique countries: {df_clean['Country'].nunique()}")
    print(f"Time range: {df_clean['year'].min()} - {df_clean['year'].max()}")
    print(f"Missing temperatures: {df_clean['AverageTemperature'].isna().sum():,} ({df_clean['AverageTemperature'].isna().mean():.1%})")

    # City coverage over time
    yearly_cities = df_clean.groupby('year')['City'].nunique()
    print(f"Peak City Coverage: {yearly_cities.max()} cities in {yearly_cities.idxmax()}")

    return df_clean

# Analyze your city data
df_cities_clean = analyze_data(df)

Analysing city Temperature dataset
Total city records: 239,177
Unique cities: 100
Unique countries: 49
Time range: 1743 - 2013
Missing temperatures: 11,002 (4.6%)
Peak City Coverage: 100 cities in 1881


# Calculating City Warming Rates

In [135]:
def city_warming_rates(df):
    print('Calculating City warming Rates')

    city_results = []

    for city in df['City'].unique():
        city_data = df[df['City'] == city].copy()
        city_data = city_data.dropna(subset=['AverageTemperature'])

        # Get city metadata
        sample_row = city_data.iloc[0]
        country = sample_row['Country']

     # Focus on modern period (1900-2013) for consistent analysis
        modern_data = city_data[(city_data['year'] >= 1900) & (city_data['year'] < 2013)]

        if len(modern_data) < 240:
            continue #atleast 20 yrs of monthly data
        #calculate annual average 
        annual_avg = modern_data.groupby('year')['AverageTemperature'].mean().reset_index()

        if len(annual_avg) < 30:
            continue
        # Linear Regression for for warming data
        years= annual_avg['year'].values
        temps = annual_avg['AverageTemperature'].values

        slope, intercept = np.polyfit(years, temps, 1)
        warming_per_decade = slope * 10

        #Calculate R_squared
        y_pred = slope * years + intercept
        ss_res = np.sum((temps - y_pred)**2)
        ss_tot = np.sum((temps - np.mean(temps)) ** 2)
        r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0

        # Data qaulity metrics
        total_months = len(modern_data)
        data_years = annual_avg['year'].max() - annual_avg['year'].min() + 1
        completeness = total_months / (data_years * 12)

        city_results.append({
            'city': city,
            'country': country,
            'warming_rate_c_per_decade': warming_per_decade,
            'r_squared': r_squared,
            'data_points': len(annual_avg),
            'total_months': total_months,
            'data_completeness': completeness,
            'start_year': annual_avg['year'].min(),
            'end_year': annual_avg['year'].max(),
            'mean_temperature': annual_avg['AverageTemperature'].mean(),
            'data_quality': 'High' if r_squared > 0.5 else 'Medium' if r_squared > 0.2 else 'Low'
        })
    
    city_results_df = pd.DataFrame(city_results)
    city_results_df = city_results_df.sort_values('warming_rate_c_per_decade', ascending=False)
    
    print(f"Cities with sufficient modern data: {len(city_results_df)}")
    print(f"Average warming rate: {city_results_df['warming_rate_c_per_decade'].mean():.3f}°C/decade")
    print(f"High-quality trends (R² > 0.5): {len(city_results_df[city_results_df['r_squared'] > 0.5])} cities")
    
    return city_results_df

# Calculate comprehensive city warming rates
city_warming_rates = city_warming_rates(df_cities_clean)

Calculating City warming Rates
Cities with sufficient modern data: 100
Average warming rate: 0.098°C/decade
High-quality trends (R² > 0.5): 17 cities


# Urban Heat Island Analysis

In [137]:
def uhi_analysis(city_warming_rates, results):
    
    
    print('Urban Heat Island Analysis')
    # Merge city data with country data
    comparison_df = city_warming_rates.copy()

    # Get country averages from Level 2
    country_avg = results.groupby('country')['warming_rate_c_per_decade'].mean()
    comparison_df['country_warming_rate'] = comparison_df['country'].map(country_avg)

     # Handle countries not in Level 2 results
    missing_countries = comparison_df[comparison_df['country_warming_rate'].isna()]['country'].unique()
    if len(missing_countries) > 0:
        print(f"Note: {len(missing_countries)} countries not in level 2 results: {list(missing_countries)}")
        # Use global average for missing countries
        global_avg = results['warming_rate_c_per_decade'].mean()
        comparison_df['country_warming_rate'] = comparison_df['country_warming_rate'].fillna(global_avg)
        
        # Calculate UHI effect   #UHI_effect = City_warming_rate - Country_warming_rate 
    comparison_df['uhi_effect'] = comparison_df['warming_rate_c_per_decade'] - comparison_df['country_warming_rate']
    """ How much EXTRA warming is caused by the city itself, 
         beyond the background climate change affecting the whole country?"""
        
        # Categorize UHI intensity
    comparison_df['uhi_intensity'] = pd.cut(
            comparison_df['uhi_effect'],
            bins=[-float('inf'),-0.05, 0.05, 0.1, 0.15, float('inf')],
            labels = ['Urban Cool Island', 'Neutral', 'Moderate UHI', 'Strong UHI', 'Extreme UHI']
        )
    print(" URBAN HEAT ISLAND INTENSITY DISTRIBUTION:")
    uhi_counts = comparison_df['uhi_intensity'].value_counts().sort_index()
    for intensity, count in uhi_counts.items():
        print(f"  {intensity}: {count} cities")
    
    # Statistical summary
    print(f"\n UHI EFFECT STATISTICS:")
    print(f"  Average UHI effect: {comparison_df['uhi_effect'].mean():.3f}°C/decade")
    print(f"  Maximum UHI effect: {comparison_df['uhi_effect'].max():.3f}°C/decade")
    print(f"  Cities warming faster than countries: {len(comparison_df[comparison_df['uhi_effect'] > 0])}")
    print(f"  Cities warming slower than countries: {len(comparison_df[comparison_df['uhi_effect'] < 0])}")
    
    return comparison_df

  # Run comprehensive UHI analysis
final_uhi_analysis = uhi_analysis(city_warming_rates, results)


Urban Heat Island Analysis
🌡️ URBAN HEAT ISLAND INTENSITY DISTRIBUTION:
  Urban Cool Island: 57 cities
  Neutral: 43 cities
  Moderate UHI: 0 cities
  Strong UHI: 0 cities
  Extreme UHI: 0 cities

📊 UHI EFFECT STATISTICS:
  Average UHI effect: -0.066°C/decade
  Maximum UHI effect: 0.017°C/decade
  Cities warming faster than countries: 6
  Cities warming slower than countries: 94


# Top Urban Climate Findings

In [139]:
def urban_climate_insights(final_uhi_analysis):
    print(f"Fastest Warming Cities")
    fastest_cities = final_uhi_analysis.nlargest(10,'warming_rate_c_per_decade')

    print(" TOP 10 FASTEST-WARMING CITIES:")
    for i , row in fastest_cities.iterrows():
        print(f"{i+1:2d}.{row['city']:20} {row['country']:20} + {row['warming_rate_c_per_decade']:.3f}°C/decade")

    # Strongest UHI effects
    strong_uhi = final_uhi_analysis[final_uhi_analysis['uhi_intensity'].isin(['Strong UHI', 'Extreme UHI'])]

    print(f"\n STRONG URBAN HEAT ISLAND CITIES {len(strong_uhi)} cities")
    for i, row in strong_uhi.nlargest(10,'uhi_effect').iterrows():
        print(f"{row['city']:20} {row['country']:20} + {row['uhi_effect']:.3f}°C/decade")

     # Urban cool islands (cities warming slower than surroundings)
    cool_islands = final_uhi_analysis[final_uhi_analysis['uhi_intensity'] == 'Urban Cool Island']
    if len(cool_islands) > 0:
        print(f"\n Urban Cool Islands: ({len(cool_islands)} cities):")
        for i, row in cool_islands.nsmallest(5, 'uhi_effect').iterrows():
            print(f"{row['city']:20} {row['country']:20} + {row['uhi_effect']:.3f}°C/decade")

    print(f"\n REGIONAL UHI PATTERNS:")
    regional_uhi = final_uhi_analysis.groupby('country')['uhi_effect'].agg(['mean','count']).round(4)
    regional_uhi = regional_uhi.sort_values('mean',ascending= False)

    for country, stats in regional_uhi.head(8).iterrows():
        print(f"  {country:25} {stats['mean']:.3f}°C/decade avg UHI ({int(stats['count'])} cities)")

# Generate insights
urban_climate_insights(final_uhi_analysis)
    


Fastest Warming Cities
 TOP 10 FASTEST-WARMING CITIES:
61.Mashhad              Iran                 + 0.164°C/decade
35.Harbin               China                + 0.160°C/decade
20.Changchun            China                + 0.155°C/decade
66.Moscow               Russia               + 0.151°C/decade
87.Shenyang             China                + 0.143°C/decade
49.Kiev                 Ukraine              + 0.135°C/decade
25.Dalian               China                + 0.133°C/decade
74.Peking               China                + 0.133°C/decade
95.Tianjin              China                + 0.133°C/decade
 7.Baghdad              Iraq                 + 0.132°C/decade

🏙️ STRONG URBAN HEAT ISLAND CITIES 0 cities

 Urban Cool Islands: (57 cities):
Saint Petersburg     Russia               + -0.206°C/decade
Toronto              Canada               + -0.183°C/decade
Montreal             Canada               + -0.174°C/decade
Moscow               Russia               + -0.165°C/decade
Kabul

## Problem

In [141]:
root_cause = """
PROBLEM: Level 2 country warming rates are 2-3x too high!
• Global average should be ~0.1-0.2°C/decade
• Your country rates: 0.3-0.5°C/decade  
• This makes ALL cities appear as "cool islands"

CAUSE: Likely different time periods or data processing between Level 2 and Level 3
"""

## Recalculate with Consistent Methodology

In [143]:
def proper_baseline(city_warming_rates):
    print("Recalculating with proper baseline")

    # Global avg as baseline
    global_avg = 0.165
    corrected_analysis = city_warming_rates.copy()
    corrected_analysis['baseline_warming'] = global_avg
    corrected_analysis['uhi_effect_corrected'] = corrected_analysis['warming_rate_c_per_decade'] - global_avg

    # Recategorize UHI intensity
    corrected_analysis['uhi_intensity_corrected'] = pd.cut(
        corrected_analysis['uhi_effect_corrected'],
        bins=[-float('inf'), -0.05, 0.05, 0.1, 0.15, float('inf')],
        labels=['Urban Cool Island', 'Neutral', 'Moderate UHI', 'Strong UHI', 'Extreme UHI']
    )
    print("CORRECTED UHI ANALYSIS:")
    print(corrected_analysis['uhi_intensity_corrected'].value_counts().sort_index())
    
    return corrected_analysis

# Recalculate with proper baseline
corrected_uhi = proper_baseline(city_warming_rates)

Recalculating with proper baseline
CORRECTED UHI ANALYSIS:
Urban Cool Island    82
Neutral              18
Moderate UHI          0
Strong UHI            0
Extreme UHI           0
Name: uhi_intensity_corrected, dtype: int64


# The UHI calculation is problematic due to data inconsistencies between Level 2 and Level 3.

In [145]:
current_issue = {
    'symptom': "82 cities as Urban Cool Islands (82%)",
    'implication': "Cities appear to be warming MUCH slower than global average", 
    'reality_check': "This contradicts urban climate science",
    'likely_cause': "Different time periods or data processing between analyses"
}

In [146]:
avoid_conclusions = [
    "Urban vs rural comparisons (data mismatch)",
    "UHI effect quantification (methodology issues)",
    "Country-level urban comparisons (inconsistent baselines)"
]

# Skip UHI, Focus on Urban Warming Patterns

In [148]:
def urban_warming_patterns(city_warning_rates):
    print("Urban warming patterns analysis")

    # Categorize cities by absolute warming rates
    city_warming_rates['warming_intensity'] = pd.cut(
        city_warming_rates['warming_rate_c_per_decade'],
        bins=[-float('inf'), 0.10, 0.15, 0.20, 0.25, float('inf')],
        labels=['Very Slow', 'Slow', 'Moderate', 'Fast', 'Extreme']
    )

    print('URBAN WARMING INTENSITY DISTRIBUTION:')
    warming_counts = city_warming_rates['warming_intensity'].value_counts().sort_index()
    for intensity, count in warming_counts.items():
        print(f"{intensity}: {count} cites")

    # Fastest warming urban hotspots
    extreme_cities = city_warming_rates[city_warming_rates['warming_intensity']== 'Extreme']
    fast_cities = city_warming_rates[city_warming_rates['warming_intensity']== 'Fast']

    print(f"\n URBAN CLIMATE HOTSPOTS:")
    print(f"Extreme warming cities: {len(extreme_cities)}")
    print(f"Fast warming cities: {len(fast_cities)}")

    if len(extreme_cities) > 0:
        print(f"\n EXTREME URBAN WARMING CITIES:")
        for _, row in extreme_cities.iterrows():
            print(f" {row['city']:20} {row['country']:20} {row['warming_rate_c_per_decade']:.3f}°C/decade")
    
    return city_warming_rates
            

urban_patterns = urban_warming_patterns(city_warming_rates)




Urban warming patterns analysis
URBAN WARMING INTENSITY DISTRIBUTION:
Very Slow: 53 cites
Slow: 43 cites
Moderate: 4 cites
Fast: 0 cites
Extreme: 0 cites

 URBAN CLIMATE HOTSPOTS:
Extreme warming cities: 0
Fast warming cities: 0


* 96% of cities in "Very Slow" to "Slow" warming categories
* Only 4% in "Moderate" warming
* 0 cities in "Fast" or "Extreme" categories

This could indicate:

* Urban cooling measures are working
* Geographic distribution favoring cooler regions
* Data period not capturing recent acceleration
* Genuine climate pattern worth investigating

# Enhanced Urban Warming Hotspots Analysis

In [151]:
def urban_warming_hotspots(city_warming_rates):
    print("ENHANCED URBAN WARMING PATTERNS ANALYSIS")

    # Categorize cities by absolute warming rates
    city_warming_rates['warming_intensity'] = pd.cut(
        city_warming_rates['warming_rate_c_per_decade'],
        bins=[-float('inf'), 0.10, 0.15, 0.20, 0.25, float('inf')],
        labels=['Very Slow', 'Slow', 'Moderate', 'Fast', 'Extreme']
    )
    print('URBAN WARMING INTENSITY DISTRIBUTION:')
    warming_counts = city_warming_rates['warming_intensity'].value_counts().sort_index()
    for intensity, count in warming_counts.items():
        print(f"{intensity}: {count} cites")

    # NEW: Enhanced hotspot detection
    print(f"\n ENHANCED URBAN CLIMATE HOTSPOTS:")

    # 1. Top 10% fastest warming cities (regardless of category)
    top_10_percent = int(len(city_warming_rates) * 0.1)
    fastest_cities = city_warming_rates.nlargest(top_10_percent, 'warming_rate_c_per_decade')

    print(f"Top {top_10_percent} fastest-warming cities:")
    for _, row in fastest_cities.iterrows():
        print(f"  {row['city']:20} {row['country']:20} {row['warming_rate_c_per_decade']:.3f}°C/decade")

    # 2. Relative warming analysis
    global_urban_avg = city_warming_rates['warming_rate_c_per_decade'].mean()
    above_avg_cities = city_warming_rates[city_warming_rates['warming_rate_c_per_decade'] > global_urban_avg]

    print(f"\n Cities warming faster than urban average ({global_urban_avg:.3f}°C/decade): {len(above_avg_cities)}")
    
    # 3. Regional clustering analysis
    print(f"\n REGIONAL PATTERNS:")

    # Group by country to find national urban hotspots
    country_urban_avg = city_warming_rates.groupby('country')['warming_rate_c_per_decade'].agg(['mean','count']).round(4)
    country_urban_avg= country_urban_avg[country_urban_avg['count'] >= 3] # Only countries with multiple cities

    if len(country_urban_avg) >  0:
        fastest_country = country_urban_avg.nlargest(3, 'mean')
        print("Countries with fastest urban warming (multiple cities):")
        for country, row in fastest_country.iterrows():
            print(f"  {country:20} {row['mean']:.3f}°C/decade ({int(row['count'])} cities)")

    # 4. Statistical insights
    print(f"\n STATISTICAL INSIGHTS:")
    print(f"  Urban warming range: {city_warming_rates['warming_rate_c_per_decade'].min():.3f} to {city_warming_rates['warming_rate_c_per_decade'].max():.3f}°C/decade")
    print(f"  Global urban average: {global_urban_avg:.3f}°C/decade")
    print(f"  Standard deviation: {city_warming_rates['warming_rate_c_per_decade'].std():.3f}°C/decade")
    
    # 5. Moderate warming cities analysis (your current "hotspots")
    moderate_plus = city_warming_rates[city_warming_rates['warming_rate_c_per_decade'] >= 0.15]
    if len(moderate_plus) > 0:
        print(f"\n🏙️ MODERATE+ WARMING CITIES (≥0.15°C/decade):")
        for _, row in moderate_plus.iterrows():
            print(f"  {row['city']:20} {row['country']:20} {row['warming_rate_c_per_decade']:.3f}°C/decade")
    
    return city_warming_rates

# Run enhanced analysis
urban_patterns_enhanced = urban_warming_hotspots(city_warming_rates)

ENHANCED URBAN WARMING PATTERNS ANALYSIS
URBAN WARMING INTENSITY DISTRIBUTION:
Very Slow: 53 cites
Slow: 43 cites
Moderate: 4 cites
Fast: 0 cites
Extreme: 0 cites

 ENHANCED URBAN CLIMATE HOTSPOTS:
Top 10 fastest-warming cities:
  Mashhad              Iran                 0.164°C/decade
  Harbin               China                0.160°C/decade
  Changchun            China                0.155°C/decade
  Moscow               Russia               0.151°C/decade
  Shenyang             China                0.143°C/decade
  Kiev                 Ukraine              0.135°C/decade
  Dalian               China                0.133°C/decade
  Peking               China                0.133°C/decade
  Tianjin              China                0.133°C/decade
  Baghdad              Iraq                 0.132°C/decade

 Cities warming faster than urban average (0.098°C/decade): 48

 REGIONAL PATTERNS:
Countries with fastest urban warming (multiple cities):
  China                0.110°C/decade (1

#### 4 distinct urban hotspots
#### Regional clustering in China
#### Urban-rural warming differences
#### Actionable policy priorities

## The enhanced analysis is still showing the same absolute categorization in the distribution, which means the core problem remains.

In [154]:
def urban_warming_patterns_fixed(city_warming_rates):
    print("FIXED URBAN WARMING PATTERNS ANALYSIS")
    print("=" * 45)
    
    # USE RELATIVE THRESHOLDS based on your data distribution
    warming_rates = city_warming_rates['warming_rate_c_per_decade']
    
    # Calculate percentiles from YOUR data
    p20 = warming_rates.quantile(0.20)  # 20th percentile
    p40 = warming_rates.quantile(0.40)  # 40th percentile  
    p60 = warming_rates.quantile(0.60)  # 60th percentile
    p80 = warming_rates.quantile(0.80)  # 80th percentile
    
    print(f"Data-driven thresholds:")
    print(f"  Bottom 20%: <{p20:.3f}°C/decade")
    print(f"  20-40%: {p20:.3f}-{p40:.3f}°C/decade") 
    print(f"  40-60%: {p40:.3f}-{p60:.3f}°C/decade")
    print(f"  60-80%: {p60:.3f}-{p80:.3f}°C/decade")
    print(f"  Top 20%: ≥{p80:.3f}°C/decade")
    
    # Apply data-driven categorization
    city_warming_rates['warming_intensity'] = pd.cut(
        city_warming_rates['warming_rate_c_per_decade'],
        bins=[-float('inf'), p20, p40, p60, p80, float('inf')],
        labels=['Very Slow', 'Slow', 'Moderate', 'Fast', 'Extreme']
    )

    print('\nURBAN WARMING INTENSITY DISTRIBUTION:')
    warming_counts = city_warming_rates['warming_intensity'].value_counts().sort_index()
    for intensity, count in warming_counts.items():
        print(f"{intensity}: {count} cities")

    # NOW you'll actually find cities in all categories!
    extreme_cities = city_warming_rates[city_warming_rates['warming_intensity'] == 'Extreme']
    fast_cities = city_warming_rates[city_warming_rates['warming_intensity'] == 'Fast']
    moderate_cities = city_warming_rates[city_warming_rates['warming_intensity'] == 'Moderate']

    print(f"\nURBAN CLIMATE HOTSPOTS:")
    print(f"Extreme warming cities: {len(extreme_cities)}")
    print(f"Fast warming cities: {len(fast_cities)}")
    print(f"Moderate warming cities: {len(moderate_cities)}")

    if len(extreme_cities) > 0:
        print(f"\nEXTREME URBAN WARMING CITIES (Top 20%):")
        for _, row in extreme_cities.iterrows():
            print(f"  {row['city']:20} {row['country']:20} {row['warming_rate_c_per_decade']:.3f}°C/decade")
    
    if len(fast_cities) > 0:
        print(f"\nFAST URBAN WARMING CITIES (60-80th percentile):")
        for _, row in fast_cities.iterrows():
            print(f"  {row['city']:20} {row['country']:20} {row['warming_rate_c_per_decade']:.3f}°C/decade")

    return city_warming_rates

# Run the FIXED version
urban_patterns_fixed = urban_warming_patterns_fixed(city_warming_rates)

FIXED URBAN WARMING PATTERNS ANALYSIS
Data-driven thresholds:
  Bottom 20%: <0.077°C/decade
  20-40%: 0.077-0.091°C/decade
  40-60%: 0.091-0.102°C/decade
  60-80%: 0.102-0.114°C/decade
  Top 20%: ≥0.114°C/decade

URBAN WARMING INTENSITY DISTRIBUTION:
Very Slow: 20 cities
Slow: 20 cities
Moderate: 20 cities
Fast: 20 cities
Extreme: 20 cities

URBAN CLIMATE HOTSPOTS:
Extreme warming cities: 20
Fast warming cities: 20
Moderate warming cities: 20

EXTREME URBAN WARMING CITIES (Top 20%):
  Mashhad              Iran                 0.164°C/decade
  Harbin               China                0.160°C/decade
  Changchun            China                0.155°C/decade
  Moscow               Russia               0.151°C/decade
  Shenyang             China                0.143°C/decade
  Kiev                 Ukraine              0.135°C/decade
  Dalian               China                0.133°C/decade
  Peking               China                0.133°C/decade
  Tianjin              China            

In [155]:
def plot_urban_warming_distribution(urban_patterns_fixed):
    """Show the distribution across warming intensity categories"""
    
    # Count by category
    category_counts = urban_patterns_fixed['warming_intensity'].value_counts().reset_index()
    category_counts.columns = ['warming_intensity', 'count']
    
    fig = px.bar(
        category_counts,
        x='warming_intensity',
        y='count',
        color='warming_intensity',
        color_discrete_map={
            'Very Slow': '#2ecc71',
            'Slow': '#f39c12',
            'Moderate': '#e67e22', 
            'Fast': '#e74c3c',
            'Extreme': '#8b0000'
        },
        category_orders={'warming_intensity': ['Very Slow', 'Slow', 'Moderate', 'Fast', 'Extreme']},
        title='<b>Urban Warming Intensity Distribution</b><br><b>20 Cities in Each Category - Perfect Balance</b>',
        labels={'count': 'Number of Cities', 'warming_intensity': 'Warming Intensity'}
    )
    
    # Add count labels on bars
    fig.update_traces(texttemplate='%{y}', textposition='outside')
    
    fig.update_layout(
        showlegend=False,
        height=400,
        xaxis_title="Warming Intensity Category",
        yaxis_title="Number of Cities"
    )
    
    return fig
fig1 = plot_urban_warming_distribution(urban_patterns_fixed)
fig1.show()

In [156]:
def plot_extreme_warming_cities(urban_patterns_fixed):
    """Bar chart of the top 20 extreme warming cities"""
    
    extreme_cities = urban_patterns_fixed[urban_patterns_fixed['warming_intensity'] == 'Extreme']
    extreme_cities = extreme_cities.nlargest(20, 'warming_rate_c_per_decade')
    
    fig = px.bar(
        extreme_cities,
        x='warming_rate_c_per_decade',
        y='city',
        orientation='h',
        color='warming_rate_c_per_decade',
        color_continuous_scale='Reds',
        hover_data={'country': True},
        title='<b>Top 20 Extreme Urban Warming Cities</b><br><b>Mashhad, Iran Leads at 0.164°C/decade</b>'
    )
    
    # Add country labels
    fig.update_traces(
        hovertemplate='<b>%{y}</b><br>Country: %{customdata[0]}<br>Warming Rate: %{x:.3f}°C/decade<extra></extra>'
    )
    
    fig.update_layout(
        yaxis={'categoryorder': 'total ascending'},
        height=600,
        xaxis_title="Warming Rate (°C per decade)",
        yaxis_title="City",
        showlegend=False
    )
    
    return fig
fig2 = plot_extreme_warming_cities(urban_patterns_fixed)
fig2.show()

In [157]:
def plot_warming_distribution_histogram(urban_patterns_fixed):
    """Show the statistical distribution of urban warming rates"""
    
    fig = px.histogram(
        urban_patterns_fixed,
        x='warming_rate_c_per_decade',
        nbins=20,
        color_discrete_sequence=['#e74c3c'],
        title='<b>Distribution of Urban Warming Rates</b><br><b>Range: 0.049 to 0.164°C/decade</b>',
        labels={'warming_rate_c_per_decade': 'Warming Rate (°C per decade)'}
    )
    
    # Add statistical lines
    mean_rate = urban_patterns_fixed['warming_rate_c_per_decade'].mean()
    std_rate = urban_patterns_fixed['warming_rate_c_per_decade'].std()
    
    fig.add_vline(x=mean_rate, line_dash="dash", line_color="blue",
                 annotation_text=f"Mean: {mean_rate:.3f}°C/decade")
    
    fig.add_vline(x=mean_rate + std_rate, line_dash="dot", line_color="orange",
                 annotation_text=f"+1 STD: {mean_rate + std_rate:.3f}°C/decade")
    
    fig.add_vline(x=mean_rate - std_rate, line_dash="dot", line_color="orange",
                 annotation_text=f"-1 STD: {mean_rate - std_rate:.3f}°C/decade")
    
    fig.update_layout(
        height=400,
        showlegend=False,
        xaxis_title="Warming Rate (°C per decade)",
        yaxis_title="Number of Cities"
    )
    
    return fig
fig5 = plot_warming_distribution_histogram(urban_patterns_fixed)
fig5.show()

In [158]:
results.to_csv('country_warming_rates.csv', index=False)
print("✅ Data saved to country_warming_rates.csv")

✅ Data saved to country_warming_rates.csv


In [159]:
city_warming_rates.to_csv('city_warming_rates.csv', index=False)
print("✅ Data saved to city_warming_rates.csv")

✅ Data saved to city_warming_rates.csv


In [213]:
vulnerability_results.to_csv('vulnerability_results.csv', index=False)
print("✅ Data saved to vulnerability_results.csv")

✅ Data saved to vulnerability_results.csv
