In [None]:
# CO2 Emissions and Global Temperature Impact Dashboard - Google Colab Version
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.io as pio

# Initialize Plotly for Colab
init_notebook_mode(connected=True)
pio.renderers.default = 'colab'

# Load the CO2 dataset
print("Loading CO2 dataset...")
try:
    url = 'https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv'
    df = pd.read_csv(url)
    print(f"Dataset loaded successfully! Shape: {df.shape}")
except Exception as e:
    print(f"Error loading dataset: {e}")
    exit()

# Data preprocessing
print("Processing data...")
# Filter out non-country entities and clean the data
df_clean = df[~df['country'].isin([
    'World', 'Asia', 'Europe', 'North America', 'South America',
    'European Union (27)', 'European Union (28)', 'High-income countries',
    'Low-income countries', 'Upper-middle-income countries', 'Lower-middle-income countries'
])]
df_clean = df_clean.dropna(subset=['co2', 'year', 'temperature_change_from_co2'])

# Get the most recent year with data
latest_year = df_clean['year'].max()
print(f"Latest year in dataset: {latest_year}")

# --- Data for Graphs ---

# 1. Emissions by Source (Stacked Bar Chart)
df_world = df[df['country'] == 'World']
df_filtered_world = df_world[(df_world['year'] >= 1900) & (df_world['year'] <= latest_year)]

# Check which CO2 source columns exist
available_columns = []
for col in ['coal_co2', 'oil_co2', 'gas_co2', 'cement_co2', 'flaring_co2', 'other_industry_co2']:
    if col in df_filtered_world.columns and not df_filtered_world[col].isna().all():
        available_columns.append(col)

print(f"Available CO2 source columns: {available_columns}")

# 2. Top Countries Contributing to Temperature Change (Pie Chart)
top_temp_contributors = df_clean[df_clean['year'] == latest_year].dropna(subset=['temperature_change_from_co2']).nlargest(10, 'temperature_change_from_co2')

# 3. Dual-Axis Line Chart (CO2 vs. Temperature)
global_trends = df[df['country'] == 'World'].groupby('year').agg({
    'co2': 'sum',
    'temperature_change_from_co2': 'mean'
}).reset_index()

print("Creating visualizations...")

# Create the visualizations
print("\n" + "="*50)
print("CO‚ÇÇ EMISSIONS AND GLOBAL TEMPERATURE IMPACT DASHBOARD")
print("="*50)

# 1. Stacked Bar Chart - Emissions by Source
print("\n1. Global CO‚ÇÇ Emissions by Source")
if available_columns:
    fig1 = px.bar(
        df_filtered_world,
        x='year',
        y=available_columns,
        title='Global CO‚ÇÇ Emissions by Source Over Time',
        labels={
            "value": "Annual CO‚ÇÇ Emissions (Million tonnes)",
            "variable": "Source",
            "year": "Year"
        },
        color_discrete_sequence=px.colors.qualitative.Set1
    )
    fig1.update_layout(barmode='stack', height=500)
    fig1.show()
else:
    print("No CO2 source data available for stacked bar chart")

# 2. Pie Chart - Top Temperature Contributors
print("\n2. Top Contributors to Temperature Change")
if len(top_temp_contributors) > 0:
    fig2 = px.pie(
        top_temp_contributors,
        values='temperature_change_from_co2',
        names='country',
        title=f'Share of Temperature Impact from CO‚ÇÇ ({latest_year})',
        hole=0.3,
        color_discrete_sequence=px.colors.sequential.Agsunset
    )
    fig2.update_traces(textposition='inside', textinfo='percent+label')
    fig2.update_layout(height=500)
    fig2.show()
else:
    print("No temperature contributor data available")

# 3. Dual-Axis Line Chart - CO2 vs Temperature
print("\n3. CO‚ÇÇ Emissions vs Temperature Change Over Time")
if len(global_trends) > 0:
    fig3 = make_subplots(specs=[[{"secondary_y": True}]])

    fig3.add_trace(
        go.Scatter(x=global_trends['year'], y=global_trends['co2'],
                   name="Total CO‚ÇÇ Emissions", line=dict(color='#e74c3c', width=3)),
        secondary_y=False,
    )

    fig3.add_trace(
        go.Scatter(x=global_trends['year'], y=global_trends['temperature_change_from_co2'],
                   name="Temperature Change from CO‚ÇÇ", line=dict(color='#3498db', width=3)),
        secondary_y=True,
    )

    fig3.update_layout(
        title="CO‚ÇÇ Emissions and Temperature Change Correlation",
        plot_bgcolor='white',
        paper_bgcolor='white',
        height=500,
        showlegend=True
    )

    fig3.update_xaxes(title_text="Year")
    fig3.update_yaxes(title_text="CO‚ÇÇ Emissions (Million Tonnes)", secondary_y=False)
    fig3.update_yaxes(title_text="Temperature Change (¬∞C)", secondary_y=True)

    fig3.show()
else:
    print("No global trends data available")

Loading CO2 dataset...
Dataset loaded successfully! Shape: (50191, 79)
Processing data...
Latest year in dataset: 2023
Available CO2 source columns: ['coal_co2', 'oil_co2', 'gas_co2', 'cement_co2', 'flaring_co2', 'other_industry_co2']
Creating visualizations...

CO‚ÇÇ EMISSIONS AND GLOBAL TEMPERATURE IMPACT DASHBOARD

1. Global CO‚ÇÇ Emissions by Source



2. Top Contributors to Temperature Change



3. CO‚ÇÇ Emissions vs Temperature Change Over Time


In [None]:
# Simple Line Graph - Global CO2 Emissions Over Time
print("Creating CO2 Trend Line Graph...")

# 1. Get world CO2 data
world_data = df[df['country'] == 'World']
world_co2 = world_data[['year', 'co2']].dropna()

# 2. Create the line graph
fig = px.line(
    world_co2,
    x='year',
    y='co2',
    title='Global CO‚ÇÇ Emissions Over Time',
    labels={'co2': 'CO‚ÇÇ Emissions (Million Tonnes)', 'year': 'Year'}
)

# 3. Customize the graph
fig.update_layout(
    plot_bgcolor='white',
    height=500
)

# 4. Show the graph
fig.show()

print("Line graph created successfully!")

Creating CO2 Trend Line Graph...


Line graph created successfully!


In [None]:
# Energy Demand and CO‚ÇÇ Emissions Prediction - Google Colab Version
# Complete conversion from Streamlit to Colab

# ============================================================================
# SECTION 1: SETUP AND INSTALLATIONS
# ============================================================================

print("="*70)
print("ENERGY DEMAND & CO‚ÇÇ EMISSIONS PREDICTOR")
print("Google Colab Version")
print("="*70)

# Install required packages
print("\nüì¶ Installing required packages...")
!pip install plotly pandas scikit-learn -q

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Initialize Plotly for Colab
init_notebook_mode(connected=True)
pio.renderers.default = 'colab'

print("‚úÖ All packages loaded successfully!\n")

# ============================================================================
# SECTION 2: DATA LOADING AND PREPROCESSING
# ============================================================================

print("="*70)
print("LOADING AND PREPROCESSING DATA")
print("="*70)

def load_data():
    """Load the CO‚ÇÇ dataset from Our World in Data"""
    try:
        url = 'https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv'
        df = pd.read_csv(url)
        print("‚úÖ Data loaded successfully from Our World in Data!")
        print(f"üìä Dataset shape: {df.shape}")
        return df
    except Exception as e:
        print(f"‚ùå Error loading data: {e}")
        return None

def preprocess_data(df):
    """Clean and prepare the data for modeling"""
    if df is None:
        return None, None, None

    # Select relevant columns
    columns_needed = [
        'country', 'year', 'population', 'gdp', 'co2',
        'primary_energy_consumption', 'energy_per_capita', 'co2_per_capita'
    ]

    available_columns = [col for col in columns_needed if col in df.columns]
    df_clean = df[available_columns].copy()

    # Remove rows with missing values in key columns
    key_columns = ['country', 'year', 'co2']
    df_clean = df_clean.dropna(subset=key_columns)

    # Filter out non-country entities
    exclude_entities = [
        'World', 'Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania',
        'European Union (27)', 'European Union (28)', 'High-income countries',
        'Low-income countries', 'Upper-middle-income countries', 'Lower-middle-income countries',
        'OECD', 'Non-OECD', 'Asia (excl. China and India)', 'Europe (excl. EU-27)', 'Europe (excl. EU-28)',
        'International transport', 'International aviation', 'International shipping',
        'EU-27', 'EU-28', 'G7', 'G20'
    ]

    exclude_patterns = ['income', 'OECD', 'EU-', 'G7', 'G20', '(excl', 'International']

    def is_valid_country(country_name):
        if country_name in exclude_entities:
            return False
        for pattern in exclude_patterns:
            if pattern.lower() in country_name.lower():
                return False
        return True

    df_clean = df_clean[df_clean['country'].apply(is_valid_country)]

    # Get countries with sufficient data
    country_counts = df_clean.groupby('country').agg({
        'year': ['count', 'max']
    }).round()
    country_counts.columns = ['data_points', 'latest_year']

    valid_countries_mask = (country_counts['data_points'] >= 8) & (country_counts['latest_year'] >= 2000)
    valid_countries = country_counts[valid_countries_mask].index.tolist()

    df_clean = df_clean[df_clean['country'].isin(valid_countries)]

    # Fill missing values
    for country in df_clean['country'].unique():
        mask = df_clean['country'] == country
        df_clean.loc[mask] = df_clean.loc[mask].fillna(method='ffill').fillna(method='bfill')

    df_clean = df_clean.dropna(subset=['country', 'year', 'co2'])
    valid_countries = sorted(df_clean['country'].unique().tolist())

    return df_clean, valid_countries, available_columns

# Load and preprocess data
df = load_data()
df_clean, valid_countries, available_columns = preprocess_data(df)

if df_clean is None or df_clean.empty:
    print("‚ùå No data available. Please check your data source.")
else:
    print(f"‚úÖ Data preprocessing complete!")
    print(f"üìç Total valid countries: {len(valid_countries)}")
    print(f"üìÖ Year range: {int(df_clean['year'].min())} - {int(df_clean['year'].max())}")

# ============================================================================
# SECTION 3: USER INPUT CONFIGURATION
# ============================================================================

print("\n" + "="*70)
print("CONFIGURATION")
print("="*70)

# Select country (you can change this)
print(f"\nüåç Available countries (first 10): {valid_countries[:10]}")
print(f"   ... and {len(valid_countries) - 10} more countries")

# USER INPUT: Change these values
SELECTED_COUNTRY = 'Afghanistan'  # Change this to any country from the list
PREDICTION_YEAR = 2030              # Change this to any year between 2024-2050

print(f"\nüéØ Selected Country: {SELECTED_COUNTRY}")
print(f"üìÖ Prediction Year: {PREDICTION_YEAR}")

# Validate inputs
if SELECTED_COUNTRY not in valid_countries:
    print(f"‚ö†Ô∏è  Warning: '{SELECTED_COUNTRY}' not found. Using default: 'United States'")
    SELECTED_COUNTRY = valid_countries[0] if valid_countries else 'United States'

# ============================================================================
# SECTION 4: HISTORICAL TRENDS VISUALIZATION
# ============================================================================

print("\n" + "="*70)
print(f"HISTORICAL TRENDS FOR {SELECTED_COUNTRY}")
print("="*70)

# Filter data for selected country
country_data = df_clean[df_clean['country'] == SELECTED_COUNTRY].copy()
country_data = country_data.sort_values('year')

if len(country_data) < 5:
    print(f"‚ö†Ô∏è  Insufficient data for {SELECTED_COUNTRY}")
else:
    # Create historical trends plot
    fig_historical = make_subplots(
        rows=2, cols=1,
        subplot_titles=['CO‚ÇÇ Emissions Over Time', 'Energy Consumption Over Time'],
        vertical_spacing=0.15
    )

    # CO‚ÇÇ emissions
    fig_historical.add_trace(
        go.Scatter(
            x=country_data['year'],
            y=country_data['co2'],
            mode='lines+markers',
            name='CO‚ÇÇ Emissions',
            line=dict(color='red', width=3),
            marker=dict(size=6)
        ),
        row=1, col=1
    )

    # Energy consumption
    if 'primary_energy_consumption' in country_data.columns:
        fig_historical.add_trace(
            go.Scatter(
                x=country_data['year'],
                y=country_data['primary_energy_consumption'],
                mode='lines+markers',
                name='Energy Consumption',
                line=dict(color='blue', width=3),
                marker=dict(size=6)
            ),
            row=2, col=1
        )

    fig_historical.update_layout(
        height=700,
        showlegend=True,
        title_text=f"Historical Data for {SELECTED_COUNTRY}"
    )
    fig_historical.update_xaxes(title_text="Year")
    fig_historical.update_yaxes(title_text="CO‚ÇÇ Emissions (Mt)", row=1, col=1)
    fig_historical.update_yaxes(title_text="Energy Consumption (TWh)", row=2, col=1)

    fig_historical.show()

    # Display key statistics
    latest_data = country_data.iloc[-1]
    print(f"\nüìä KEY STATISTICS FOR {SELECTED_COUNTRY}")
    print("-" * 70)
    print(f"Latest Data Year: {int(latest_data['year'])}")
    if 'population' in latest_data and pd.notna(latest_data['population']):
        print(f"Population: {latest_data['population']/1e6:.1f}M")
    if pd.notna(latest_data['co2']):
        print(f"CO‚ÇÇ Emissions: {latest_data['co2']:.1f} Mt")
    if 'primary_energy_consumption' in latest_data and pd.notna(latest_data['primary_energy_consumption']):
        print(f"Energy Consumption: {latest_data['primary_energy_consumption']:.1f} TWh")

# ============================================================================
# SECTION 5: MACHINE LEARNING PREDICTIONS
# ============================================================================

print("\n" + "="*70)
print("MACHINE LEARNING PREDICTIONS")
print("="*70)

def make_co2_prediction(country_data, prediction_year):
    """Predict CO‚ÇÇ emissions using Linear Regression"""
    features_co2 = ['year']
    if 'population' in country_data.columns:
        features_co2.append('population')
    if 'gdp' in country_data.columns:
        features_co2.append('gdp')
    if 'primary_energy_consumption' in country_data.columns:
        features_co2.append('primary_energy_consumption')

    available_features = []
    for feature in features_co2:
        if feature in country_data.columns and country_data[feature].notna().sum() > len(country_data) * 0.5:
            available_features.append(feature)

    if len(available_features) <= 1:
        return None, None, None

    X_co2 = country_data[available_features].fillna(method='ffill').fillna(method='bfill')
    y_co2 = country_data['co2'].fillna(method='ffill').fillna(method='bfill')

    if len(X_co2) < 3:
        return None, None, None

    model_co2 = LinearRegression()
    model_co2.fit(X_co2, y_co2)

    # Prepare prediction features
    latest_row = country_data.iloc[-1]
    prediction_features = []

    for feature in available_features:
        if feature == 'year':
            prediction_features.append(prediction_year)
        else:
            if len(country_data) >= 3:
                recent_values = country_data[feature].tail(3).values
                if not np.isnan(recent_values).all():
                    years_diff = prediction_year - latest_row['year']
                    if len(recent_values) >= 2:
                        trend = (recent_values[-1] - recent_values[0]) / len(recent_values)
                        predicted_value = recent_values[-1] + trend * years_diff
                    else:
                        predicted_value = recent_values[-1]
                    prediction_features.append(predicted_value)
                else:
                    prediction_features.append(latest_row[feature])
            else:
                prediction_features.append(latest_row[feature])

    prediction_co2 = model_co2.predict([prediction_features])[0]

    # Model performance
    y_pred = model_co2.predict(X_co2)
    r2 = r2_score(y_co2, y_pred)

    return prediction_co2, r2, latest_row['co2']

def make_energy_prediction(country_data, prediction_year):
    """Predict energy consumption using Linear Regression"""
    if 'primary_energy_consumption' not in country_data.columns:
        return None, None, None

    features_energy = ['year']
    if 'population' in country_data.columns:
        features_energy.append('population')
    if 'gdp' in country_data.columns:
        features_energy.append('gdp')
    if 'co2' in country_data.columns:
        features_energy.append('co2')

    available_features = []
    for feature in features_energy:
        if feature in country_data.columns and country_data[feature].notna().sum() > len(country_data) * 0.5:
            available_features.append(feature)

    if len(available_features) <= 1 or country_data['primary_energy_consumption'].notna().sum() < 3:
        return None, None, None

    X_energy = country_data[available_features].fillna(method='ffill').fillna(method='bfill')
    y_energy = country_data['primary_energy_consumption'].fillna(method='ffill').fillna(method='bfill')

    if len(X_energy) < 3:
        return None, None, None

    model_energy = LinearRegression()
    model_energy.fit(X_energy, y_energy)

    # Prepare prediction features
    latest_row = country_data.iloc[-1]
    prediction_features = []

    for feature in available_features:
        if feature == 'year':
            prediction_features.append(prediction_year)
        else:
            if len(country_data) >= 3:
                recent_values = country_data[feature].tail(3).values
                if not np.isnan(recent_values).all():
                    years_diff = prediction_year - latest_row['year']
                    if len(recent_values) >= 2:
                        trend = (recent_values[-1] - recent_values[0]) / len(recent_values)
                        predicted_value = recent_values[-1] + trend * years_diff
                    else:
                        predicted_value = recent_values[-1]
                    prediction_features.append(predicted_value)
                else:
                    prediction_features.append(latest_row[feature])
            else:
                prediction_features.append(latest_row[feature])

    prediction_energy = model_energy.predict([prediction_features])[0]

    # Model performance
    y_pred = model_energy.predict(X_energy)
    r2 = r2_score(y_energy, y_pred)

    return prediction_energy, r2, latest_row['primary_energy_consumption']

# Make predictions
if not country_data.empty and len(country_data) >= 5:
    print(f"\nü§ñ Training models for {SELECTED_COUNTRY}...")

    # CO‚ÇÇ Prediction
    co2_pred, co2_r2, current_co2 = make_co2_prediction(country_data, PREDICTION_YEAR)

    if co2_pred is not None:
        print(f"\nüéØ CO‚ÇÇ EMISSIONS PREDICTION FOR {PREDICTION_YEAR}")
        print("-" * 70)
        print(f"Predicted CO‚ÇÇ: {co2_pred:.2f} Million Tonnes")
        print(f"Current CO‚ÇÇ (2023): {current_co2:.2f} Mt")
        print(f"Change: {co2_pred - current_co2:+.2f} Mt ({((co2_pred - current_co2)/current_co2)*100:+.1f}%)")
        print(f"Model Accuracy (R¬≤): {co2_r2:.3f}")
    else:
        print("\n‚ö†Ô∏è  Could not generate CO‚ÇÇ prediction (insufficient data)")

    # Energy Prediction
    energy_pred, energy_r2, current_energy = make_energy_prediction(country_data, PREDICTION_YEAR)

    if energy_pred is not None:
        print(f"\n‚ö° ENERGY CONSUMPTION PREDICTION FOR {PREDICTION_YEAR}")
        print("-" * 70)
        print(f"Predicted Energy: {energy_pred:.2f} TWh")
        print(f"Current Energy (2023): {current_energy:.2f} TWh")
        print(f"Change: {energy_pred - current_energy:+.2f} TWh ({((energy_pred - current_energy)/current_energy)*100:+.1f}%)")
        print(f"Model Accuracy (R¬≤): {energy_r2:.3f}")
    else:
        print("\n‚ö†Ô∏è  Could not generate Energy prediction (insufficient data)")

# ============================================================================
# SECTION 6: FUTURE PROJECTIONS
# ============================================================================

print("\n" + "="*70)
print(f"FUTURE PROJECTIONS (2024-2040)")
print("="*70)

if not country_data.empty and len(country_data) >= 5:
    future_years = list(range(2024, 2041))
    future_co2 = []
    future_energy = []

    if len(country_data) >= 3:
        recent_data = country_data.tail(5)

        # CO‚ÇÇ trend
        co2_trend = (recent_data['co2'].iloc[-1] - recent_data['co2'].iloc[0]) / len(recent_data)
        current_co2 = country_data['co2'].iloc[-1]

        # Energy trend
        if 'primary_energy_consumption' in country_data.columns and country_data['primary_energy_consumption'].notna().sum() > 0:
            energy_trend = (recent_data['primary_energy_consumption'].iloc[-1] - recent_data['primary_energy_consumption'].iloc[0]) / len(recent_data)
            current_energy = country_data['primary_energy_consumption'].iloc[-1]
        else:
            energy_trend = 0
            current_energy = 0

        for year in future_years:
            years_ahead = year - country_data['year'].iloc[-1]
            projected_co2 = current_co2 + (co2_trend * years_ahead)
            future_co2.append(max(0, projected_co2))

            if energy_trend != 0:
                projected_energy = current_energy + (energy_trend * years_ahead)
                future_energy.append(max(0, projected_energy))
            else:
                future_energy.append(0)

        # Create projection plot
        fig_projection = make_subplots(
            rows=2, cols=1,
            subplot_titles=['CO‚ÇÇ Emissions Projection', 'Energy Consumption Projection'],
            vertical_spacing=0.15
        )

        # Historical + Projected CO‚ÇÇ
        fig_projection.add_trace(
            go.Scatter(
                x=country_data['year'],
                y=country_data['co2'],
                mode='lines+markers',
                name='Historical CO‚ÇÇ',
                line=dict(color='red', width=3)
            ),
            row=1, col=1
        )

        fig_projection.add_trace(
            go.Scatter(
                x=future_years,
                y=future_co2,
                mode='lines+markers',
                name='Projected CO‚ÇÇ',
                line=dict(color='red', width=3, dash='dash')
            ),
            row=1, col=1
        )

        # Historical + Projected Energy
        if 'primary_energy_consumption' in country_data.columns and country_data['primary_energy_consumption'].notna().sum() > 0:
            fig_projection.add_trace(
                go.Scatter(
                    x=country_data['year'],
                    y=country_data['primary_energy_consumption'],
                    mode='lines+markers',
                    name='Historical Energy',
                    line=dict(color='blue', width=3)
                ),
                row=2, col=1
            )

            fig_projection.add_trace(
                go.Scatter(
                    x=future_years,
                    y=future_energy,
                    mode='lines+markers',
                    name='Projected Energy',
                    line=dict(color='blue', width=3, dash='dash')
                ),
                row=2, col=1
            )

        fig_projection.update_layout(
            height=700,
            showlegend=True,
            title_text=f"Future Projections for {SELECTED_COUNTRY} (2024-2040)"
        )
        fig_projection.update_xaxes(title_text="Year")
        fig_projection.update_yaxes(title_text="CO‚ÇÇ Emissions (Mt)", row=1, col=1)
        fig_projection.update_yaxes(title_text="Energy Consumption (TWh)", row=2, col=1)

        fig_projection.show()

        # Projection summary
        avg_co2_growth = np.mean(np.diff(future_co2))
        total_co2_increase = future_co2[-1] - current_co2

        print(f"\nüìä PROJECTION SUMMARY")
        print("-" * 70)
        print(f"Average Annual CO‚ÇÇ Growth: {avg_co2_growth:.2f} Mt/year")
        print(f"Total CO‚ÇÇ Increase (2024-2040): {total_co2_increase:.2f} Mt ({(total_co2_increase/current_co2)*100:.1f}%)")

        if future_energy and max(future_energy) > 0:
            avg_energy_growth = np.mean(np.diff(future_energy))
            print(f"Average Annual Energy Growth: {avg_energy_growth:.2f} TWh/year")

# ============================================================================
# SECTION 7: GLOBAL ANALYTICS DASHBOARD
# ============================================================================

print("\n" + "="*70)
print("GLOBAL ANALYTICS DASHBOARD")
print("="*70)

# 1. Global CO2 Trend
print("\n1. üåç Global CO‚ÇÇ Emissions Trend")
world_data = df[df['country'] == 'World']
world_co2 = world_data[['year', 'co2']].dropna()

if not world_co2.empty:
    fig_global = px.line(
        world_co2,
        x='year',
        y='co2',
        title='Global CO‚ÇÇ Emissions Over Time',
        labels={'co2': 'CO‚ÇÇ Emissions (Million Tonnes)', 'year': 'Year'}
    )
    fig_global.update_traces(line=dict(color='#e74c3c', width=3))
    fig_global.update_layout(height=500)
    fig_global.show()

# 2. Emissions by Source
print("\n2. ‚ö° Global CO‚ÇÇ Emissions by Source")
df_world = df[df['country'] == 'World']
df_filtered_world = df_world[(df_world['year'] >= 1990) & (df_world['year'] <= df_world['year'].max())].copy()

source_columns = []
potential_sources = ['coal_co2', 'oil_co2', 'gas_co2', 'cement_co2', 'flaring_co2', 'other_industry_co2']

for col in potential_sources:
    if col in df_filtered_world.columns and not df_filtered_world[col].isna().all():
        source_columns.append(col)

if source_columns:
    fig_sources = px.bar(
        df_filtered_world.tail(30),
        x='year',
        y=source_columns,
        title='Global CO‚ÇÇ Emissions by Source (Recent Decades)',
        labels={
            "value": "Annual CO‚ÇÇ Emissions (Million tonnes)",
            "variable": "Source",
            "year": "Year"
        },
        color_discrete_sequence=px.colors.qualitative.Set1
    )
    fig_sources.update_layout(barmode='stack', height=500)
    fig_sources.show()

# 3. Top Contributors
print("\n3. üå°Ô∏è Top Temperature Impact Contributors")
temp_data = df[df['temperature_change_from_co2'].notna()].copy()

if not temp_data.empty:
    latest_year = temp_data['year'].max()
    latest_temp_data = temp_data[temp_data['year'] == latest_year]

    exclude_entities = [
        'World', 'Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania',
        'European Union (27)', 'European Union (28)', 'High-income countries',
        'Low-income countries', 'Upper-middle-income countries', 'Lower-middle-income countries',
        'OECD', 'Non-OECD', 'G7', 'G20'
    ]

    country_temp_data = latest_temp_data[~latest_temp_data['country'].isin(exclude_entities)]
    top_temp_contributors = country_temp_data.nlargest(8, 'temperature_change_from_co2')

    if len(top_temp_contributors) > 0:
        fig_pie = px.pie(
            top_temp_contributors,
            values='temperature_change_from_co2',
            names='country',
            title=f'Share of Temperature Impact from CO‚ÇÇ ({int(latest_year)})',
            color_discrete_sequence=px.colors.sequential.Reds_r
        )
        fig_pie.update_traces(textposition='inside', textinfo='percent+label')
        fig_pie.update_layout(height=500)
        fig_pie.show()

# 4. CO2 vs Temperature Correlation
print("\n4. üìä CO‚ÇÇ vs Temperature Correlation")
world_trends = df[df['country'] == 'World'].copy()

if not world_trends.empty and 'temperature_change_from_co2' in world_trends.columns:
    trends_clean = world_trends[['year', 'co2', 'temperature_change_from_co2']].dropna()

    if len(trends_clean) > 0:
        fig_dual = make_subplots(specs=[[{"secondary_y": True}]])

        fig_dual.add_trace(
            go.Scatter(
                x=trends_clean['year'],
                y=trends_clean['co2'],
                name="Total CO‚ÇÇ Emissions",
                line=dict(color='#e74c3c', width=3)
            ),
            secondary_y=False
        )

        fig_dual.add_trace(
            go.Scatter(
                x=trends_clean['year'],
                y=trends_clean['temperature_change_from_co2'],
                name="Temperature Change from CO‚ÇÇ",
                line=dict(color='#3498db', width=3)
            ),
            secondary_y=True
        )

        fig_dual.update_layout(
            title="CO‚ÇÇ Emissions and Temperature Change Correlation",
            height=500,
            showlegend=True
        )

        fig_dual.update_xaxes(title_text="Year")
        fig_dual.update_yaxes(title_text="CO‚ÇÇ Emissions (Million Tonnes)", secondary_y=False)
        fig_dual.update_yaxes(title_text="Temperature Change (¬∞C)", secondary_y=True)

        fig_dual.show()

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*70)
print("ANALYSIS COMPLETE")
print("="*70)
print("\n‚úÖ All visualizations and predictions generated successfully!")
print(f"\nüìå Summary:")
print(f"   ‚Ä¢ Country analyzed: {SELECTED_COUNTRY}")
print(f"   ‚Ä¢ Prediction year: {PREDICTION_YEAR}")
print(f"   ‚Ä¢ Historical data points: {len(country_data)}")
print(f"   ‚Ä¢ Year range: {int(country_data['year'].min())} - {int(country_data['year'].max())}")
print("\nüåç Data source: Our World in Data (https://github.com/owid/co2-data)")
print("="*70)

ENERGY DEMAND & CO‚ÇÇ EMISSIONS PREDICTOR
Google Colab Version

üì¶ Installing required packages...


‚úÖ All packages loaded successfully!

LOADING AND PREPROCESSING DATA
‚úÖ Data loaded successfully from Our World in Data!
üìä Dataset shape: (50191, 79)
‚úÖ Data preprocessing complete!
üìç Total valid countries: 226
üìÖ Year range: 1750 - 2023

CONFIGURATION

üåç Available countries (first 10): ['Afghanistan', 'Africa (GCP)', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 'Antigua and Barbuda', 'Argentina']
   ... and 216 more countries

üéØ Selected Country: Afghanistan
üìÖ Prediction Year: 2030

HISTORICAL TRENDS FOR Afghanistan



üìä KEY STATISTICS FOR Afghanistan
----------------------------------------------------------------------
Latest Data Year: 2023
Population: 41.5M
CO‚ÇÇ Emissions: 11.0 Mt
Energy Consumption: 27.1 TWh

MACHINE LEARNING PREDICTIONS

ü§ñ Training models for Afghanistan...

üéØ CO‚ÇÇ EMISSIONS PREDICTION FOR 2030
----------------------------------------------------------------------
Predicted CO‚ÇÇ: 8.57 Million Tonnes
Current CO‚ÇÇ (2023): 11.02 Mt
Change: -2.45 Mt (-22.3%)
Model Accuracy (R¬≤): 0.954

‚ö° ENERGY CONSUMPTION PREDICTION FOR 2030
----------------------------------------------------------------------
Predicted Energy: 30.53 TWh
Current Energy (2023): 27.14 TWh
Change: +3.39 TWh (+12.5%)
Model Accuracy (R¬≤): 0.826

FUTURE PROJECTIONS (2024-2040)



üìä PROJECTION SUMMARY
----------------------------------------------------------------------
Average Annual CO‚ÇÇ Growth: 0.04 Mt/year
Total CO‚ÇÇ Increase (2024-2040): 0.66 Mt (6.0%)
Average Annual Energy Growth: -0.64 TWh/year

GLOBAL ANALYTICS DASHBOARD

1. üåç Global CO‚ÇÇ Emissions Trend



2. ‚ö° Global CO‚ÇÇ Emissions by Source



3. üå°Ô∏è Top Temperature Impact Contributors



4. üìä CO‚ÇÇ vs Temperature Correlation



ANALYSIS COMPLETE

‚úÖ All visualizations and predictions generated successfully!

üìå Summary:
   ‚Ä¢ Country analyzed: Afghanistan
   ‚Ä¢ Prediction year: 2030
   ‚Ä¢ Historical data points: 75
   ‚Ä¢ Year range: 1949 - 2023

üåç Data source: Our World in Data (https://github.com/owid/co2-data)


In [None]:
# Energy Demand and CO‚ÇÇ Emissions Prediction - Google Colab Version
# Complete conversion from Streamlit to Colab

# ============================================================================
# SECTION 1: SETUP AND INSTALLATIONS
# ============================================================================

print("="*70)
print("ENERGY DEMAND & CO‚ÇÇ EMISSIONS PREDICTOR")
print("Google Colab Version")
print("="*70)

# Install required packages
print("\nüì¶ Installing required packages...")
!pip install plotly pandas scikit-learn -q

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Initialize Plotly for Colab
init_notebook_mode(connected=True)
pio.renderers.default = 'colab'

print("‚úÖ All packages loaded successfully!\n")

# ============================================================================
# SECTION 2: DATA LOADING AND PREPROCESSING
# ============================================================================

print("="*70)
print("LOADING AND PREPROCESSING DATA")
print("="*70)

def load_data():
    """Load the CO‚ÇÇ dataset from Our World in Data"""
    try:
        url = 'https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv'
        df = pd.read_csv(url)
        print("‚úÖ Data loaded successfully from Our World in Data!")
        print(f"üìä Dataset shape: {df.shape}")
        return df
    except Exception as e:
        print(f"‚ùå Error loading data: {e}")
        return None

def preprocess_data(df):
    """Clean and prepare the data for modeling"""
    if df is None:
        return None, None, None

    # Select relevant columns
    columns_needed = [
        'country', 'year', 'population', 'gdp', 'co2',
        'primary_energy_consumption', 'energy_per_capita', 'co2_per_capita'
    ]

    available_columns = [col for col in columns_needed if col in df.columns]
    df_clean = df[available_columns].copy()

    # Remove rows with missing values in key columns
    key_columns = ['country', 'year', 'co2']
    df_clean = df_clean.dropna(subset=key_columns)

    # Filter out non-country entities
    exclude_entities = [
        'World', 'Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania',
        'European Union (27)', 'European Union (28)', 'High-income countries',
        'Low-income countries', 'Upper-middle-income countries', 'Lower-middle-income countries',
        'OECD', 'Non-OECD', 'Asia (excl. China and India)', 'Europe (excl. EU-27)', 'Europe (excl. EU-28)',
        'International transport', 'International aviation', 'International shipping',
        'EU-27', 'EU-28', 'G7', 'G20'
    ]

    exclude_patterns = ['income', 'OECD', 'EU-', 'G7', 'G20', '(excl', 'International']

    def is_valid_country(country_name):
        if country_name in exclude_entities:
            return False
        for pattern in exclude_patterns:
            if pattern.lower() in country_name.lower():
                return False
        return True

    df_clean = df_clean[df_clean['country'].apply(is_valid_country)]

    # Get countries with sufficient data
    country_counts = df_clean.groupby('country').agg({
        'year': ['count', 'max']
    }).round()
    country_counts.columns = ['data_points', 'latest_year']

    valid_countries_mask = (country_counts['data_points'] >= 8) & (country_counts['latest_year'] >= 2000)
    valid_countries = country_counts[valid_countries_mask].index.tolist()

    df_clean = df_clean[df_clean['country'].isin(valid_countries)]

    # Fill missing values
    for country in df_clean['country'].unique():
        mask = df_clean['country'] == country
        df_clean.loc[mask] = df_clean.loc[mask].fillna(method='ffill').fillna(method='bfill')

    df_clean = df_clean.dropna(subset=['country', 'year', 'co2'])
    valid_countries = sorted(df_clean['country'].unique().tolist())

    return df_clean, valid_countries, available_columns

# Load and preprocess data
df = load_data()
df_clean, valid_countries, available_columns = preprocess_data(df)

if df_clean is None or df_clean.empty:
    print("‚ùå No data available. Please check your data source.")
else:
    print(f"‚úÖ Data preprocessing complete!")
    print(f"üìç Total valid countries: {len(valid_countries)}")
    print(f"üìÖ Year range: {int(df_clean['year'].min())} - {int(df_clean['year'].max())}")

# ============================================================================
# SECTION 3: USER INPUT CONFIGURATION
# ============================================================================

print("\n" + "="*70)
print("CONFIGURATION")
print("="*70)

# Select country (you can change this)
print(f"\nüåç Available countries (first 10): {valid_countries[:10]}")
print(f"   ... and {len(valid_countries) - 10} more countries")

# USER INPUT: Change these values
SELECTED_COUNTRY = 'Afghanistan'  # Change this to any country from the list
PREDICTION_YEAR = 2030              # Change this to any year between 2024-2050

print(f"\nüéØ Selected Country: {SELECTED_COUNTRY}")
print(f"üìÖ Prediction Year: {PREDICTION_YEAR}")

# Validate inputs
if SELECTED_COUNTRY not in valid_countries:
    print(f"‚ö†Ô∏è  Warning: '{SELECTED_COUNTRY}' not found. Using default: 'United States'")
    SELECTED_COUNTRY = valid_countries[0] if valid_countries else 'United States'

# ============================================================================
# SECTION 4: HISTORICAL TRENDS VISUALIZATION
# ============================================================================

print("\n" + "="*70)
print(f"HISTORICAL TRENDS FOR {SELECTED_COUNTRY}")
print("="*70)

# Filter data for selected country
country_data = df_clean[df_clean['country'] == SELECTED_COUNTRY].copy()
country_data = country_data.sort_values('year')

if len(country_data) < 5:
    print(f"‚ö†Ô∏è  Insufficient data for {SELECTED_COUNTRY}")
else:
    # Create historical trends plot
    fig_historical = make_subplots(
        rows=2, cols=1,
        subplot_titles=['CO‚ÇÇ Emissions Over Time', 'Energy Consumption Over Time'],
        vertical_spacing=0.15
    )

    # CO‚ÇÇ emissions
    fig_historical.add_trace(
        go.Scatter(
            x=country_data['year'],
            y=country_data['co2'],
            mode='lines+markers',
            name='CO‚ÇÇ Emissions',
            line=dict(color='red', width=3),
            marker=dict(size=6)
        ),
        row=1, col=1
    )

    # Energy consumption
    if 'primary_energy_consumption' in country_data.columns:
        fig_historical.add_trace(
            go.Scatter(
                x=country_data['year'],
                y=country_data['primary_energy_consumption'],
                mode='lines+markers',
                name='Energy Consumption',
                line=dict(color='blue', width=3),
                marker=dict(size=6)
            ),
            row=2, col=1
        )

    fig_historical.update_layout(
        height=700,
        showlegend=True,
        title_text=f"Historical Data for {SELECTED_COUNTRY}"
    )
    fig_historical.update_xaxes(title_text="Year")
    fig_historical.update_yaxes(title_text="CO‚ÇÇ Emissions (Mt)", row=1, col=1)
    fig_historical.update_yaxes(title_text="Energy Consumption (TWh)", row=2, col=1)

    fig_historical.show()

    # Display key statistics
    latest_data = country_data.iloc[-1]
    print(f"\nüìä KEY STATISTICS FOR {SELECTED_COUNTRY}")
    print("-" * 70)
    print(f"Latest Data Year: {int(latest_data['year'])}")
    if 'population' in latest_data and pd.notna(latest_data['population']):
        print(f"Population: {latest_data['population']/1e6:.1f}M")
    if pd.notna(latest_data['co2']):
        print(f"CO‚ÇÇ Emissions: {latest_data['co2']:.1f} Mt")
    if 'primary_energy_consumption' in latest_data and pd.notna(latest_data['primary_energy_consumption']):
        print(f"Energy Consumption: {latest_data['primary_energy_consumption']:.1f} TWh")

# ============================================================================
# SECTION 5: MACHINE LEARNING PREDICTIONS
# ============================================================================

print("\n" + "="*70)
print("MACHINE LEARNING PREDICTIONS")
print("="*70)

# Model selection
print("\nü§ñ Available Machine Learning Models:")
print("1. Linear Regression - Simple, interpretable, fast")
print("2. Random Forest - Handles non-linear relationships, robust")
print("3. Support Vector Regression (SVR) - Good for complex patterns")

# You can change this to test different models
MODEL_TYPE = "Random Forest"  # Options: "Linear Regression", "Random Forest", "SVR"
print(f"\n‚úÖ Selected Model: {MODEL_TYPE}")

def make_co2_prediction(country_data, prediction_year, model_type=MODEL_TYPE):
    """Predict CO‚ÇÇ emissions using selected ML model"""
    features_co2 = ['year']
    if 'population' in country_data.columns:
        features_co2.append('population')
    if 'gdp' in country_data.columns:
        features_co2.append('gdp')
    if 'primary_energy_consumption' in country_data.columns:
        features_co2.append('primary_energy_consumption')

    available_features = []
    for feature in features_co2:
        if feature in country_data.columns and country_data[feature].notna().sum() > len(country_data) * 0.5:
            available_features.append(feature)

    if len(available_features) <= 1:
        return None, None, None

    X_co2 = country_data[available_features].fillna(method='ffill').fillna(method='bfill')
    y_co2 = country_data['co2'].fillna(method='ffill').fillna(method='bfill')

    if len(X_co2) < 3:
        return None, None, None

    # Select and train the model
    if model_type == "Random Forest":
        model_co2 = RandomForestRegressor(n_estimators=100, random_state=42)
        X_co2_scaled = X_co2  # Random Forest doesn't need scaling
        scaler = None
    elif model_type == "SVR":
        scaler = StandardScaler()
        X_co2_scaled = scaler.fit_transform(X_co2)
        model_co2 = SVR(kernel='rbf', C=1.0, gamma='scale')
    else:  # Linear Regression (default)
        model_co2 = LinearRegression()
        X_co2_scaled = X_co2  # Linear Regression doesn't need scaling
        scaler = None

    model_co2.fit(X_co2_scaled, y_co2)

    # Prepare prediction features
    latest_row = country_data.iloc[-1]
    prediction_features = []

    for feature in available_features:
        if feature == 'year':
            prediction_features.append(prediction_year)
        else:
            if len(country_data) >= 3:
                recent_values = country_data[feature].tail(3).values
                if not np.isnan(recent_values).all():
                    years_diff = prediction_year - latest_row['year']
                    if len(recent_values) >= 2:
                        trend = (recent_values[-1] - recent_values[0]) / len(recent_values)
                        predicted_value = recent_values[-1] + trend * years_diff
                    else:
                        predicted_value = recent_values[-1]
                    prediction_features.append(predicted_value)
                else:
                    prediction_features.append(latest_row[feature])
            else:
                prediction_features.append(latest_row[feature])

    # Make prediction with appropriate feature scaling
    if model_type == "SVR":
        # SVR requires scaled features
        prediction_features_scaled = scaler.transform([prediction_features])
        prediction_co2 = model_co2.predict(prediction_features_scaled)[0]
        # Model performance with scaled features
        y_pred = model_co2.predict(X_co2_scaled)
    else:
        # Random Forest and Linear Regression use raw features
        prediction_co2 = model_co2.predict([prediction_features])[0]
        # Model performance with raw features
        y_pred = model_co2.predict(X_co2)
    r2 = r2_score(y_co2, y_pred)

    return prediction_co2, r2, latest_row['co2']

def make_energy_prediction(country_data, prediction_year, model_type=MODEL_TYPE):
    """Predict energy consumption using selected ML model"""
    if 'primary_energy_consumption' not in country_data.columns:
        return None, None, None

    features_energy = ['year']
    if 'population' in country_data.columns:
        features_energy.append('population')
    if 'gdp' in country_data.columns:
        features_energy.append('gdp')
    if 'co2' in country_data.columns:
        features_energy.append('co2')

    available_features = []
    for feature in features_energy:
        if feature in country_data.columns and country_data[feature].notna().sum() > len(country_data) * 0.5:
            available_features.append(feature)

    if len(available_features) <= 1 or country_data['primary_energy_consumption'].notna().sum() < 3:
        return None, None, None

    X_energy = country_data[available_features].fillna(method='ffill').fillna(method='bfill')
    y_energy = country_data['primary_energy_consumption'].fillna(method='ffill').fillna(method='bfill')

    if len(X_energy) < 3:
        return None, None, None

    # Select and train the model based on model_type
    if model_type == "Random Forest":
        model_energy = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)
        model_energy.fit(X_energy, y_energy)
        scaler = None  # Random Forest doesn't need scaling
    elif model_type == "SVR":
        scaler = StandardScaler()
        X_energy_scaled = scaler.fit_transform(X_energy)
        model_energy = SVR(kernel='rbf', C=100, gamma='scale')
        model_energy.fit(X_energy_scaled, y_energy)
    else:  # Linear Regression (default)
        model_energy = LinearRegression()
        model_energy.fit(X_energy, y_energy)
        scaler = None  # Linear Regression doesn't need scaling

    # Prepare prediction features
    latest_row = country_data.iloc[-1]
    prediction_features = []

    for feature in available_features:
        if feature == 'year':
            prediction_features.append(prediction_year)
        else:
            if len(country_data) >= 3:
                recent_values = country_data[feature].tail(3).values
                if not np.isnan(recent_values).all():
                    years_diff = prediction_year - latest_row['year']
                    if len(recent_values) >= 2:
                        trend = (recent_values[-1] - recent_values[0]) / len(recent_values)
                        predicted_value = recent_values[-1] + trend * years_diff
                    else:
                        predicted_value = recent_values[-1]
                    prediction_features.append(predicted_value)
                else:
                    prediction_features.append(latest_row[feature])
            else:
                prediction_features.append(latest_row[feature])

    # Make prediction with appropriate feature scaling
    if model_type == "SVR":
        # SVR requires scaled features
        prediction_features_scaled = scaler.transform([prediction_features])
        prediction_energy = model_energy.predict(prediction_features_scaled)[0]
        # Model performance with scaled features
        y_pred = model_energy.predict(X_energy_scaled)
    else:
        # Random Forest and Linear Regression use raw features
        prediction_energy = model_energy.predict([prediction_features])[0]
        # Model performance with raw features
        y_pred = model_energy.predict(X_energy)
    r2 = r2_score(y_energy, y_pred)

    return prediction_energy, r2, latest_row['primary_energy_consumption']

# Make predictions
if not country_data.empty and len(country_data) >= 5:
    print(f"\nü§ñ Training models for {SELECTED_COUNTRY}...")

    # CO‚ÇÇ Prediction
    co2_pred, co2_r2, current_co2 = make_co2_prediction(country_data, PREDICTION_YEAR)

    if co2_pred is not None:
        print(f"\nüéØ CO‚ÇÇ EMISSIONS PREDICTION FOR {PREDICTION_YEAR}")
        print("-" * 70)
        print(f"Predicted CO‚ÇÇ: {co2_pred:.2f} Million Tonnes")
        print(f"Current CO‚ÇÇ (2023): {current_co2:.2f} Mt")
        print(f"Change: {co2_pred - current_co2:+.2f} Mt ({((co2_pred - current_co2)/current_co2)*100:+.1f}%)")
        print(f"Model Accuracy (R¬≤): {co2_r2:.3f}")
    else:
        print("\n‚ö†Ô∏è  Could not generate CO‚ÇÇ prediction (insufficient data)")

    # Energy Prediction
    energy_pred, energy_r2, current_energy = make_energy_prediction(country_data, PREDICTION_YEAR)

    if energy_pred is not None:
        print(f"\n‚ö° ENERGY CONSUMPTION PREDICTION FOR {PREDICTION_YEAR}")
        print("-" * 70)
        print(f"Predicted Energy: {energy_pred:.2f} TWh")
        print(f"Current Energy (2023): {current_energy:.2f} TWh")
        print(f"Change: {energy_pred - current_energy:+.2f} TWh ({((energy_pred - current_energy)/current_energy)*100:+.1f}%)")
        print(f"Model Accuracy (R¬≤): {energy_r2:.3f}")
    else:
        print("\n‚ö†Ô∏è  Could not generate Energy prediction (insufficient data)")

# ============================================================================
# SECTION 6: FUTURE PROJECTIONS
# ============================================================================

print("\n" + "="*70)
print(f"FUTURE PROJECTIONS (2024-2040)")
print("="*70)

if not country_data.empty and len(country_data) >= 5:
    future_years = list(range(2024, 2041))
    future_co2 = []
    future_energy = []

    if len(country_data) >= 3:
        recent_data = country_data.tail(5)

        # CO‚ÇÇ trend
        co2_trend = (recent_data['co2'].iloc[-1] - recent_data['co2'].iloc[0]) / len(recent_data)
        current_co2 = country_data['co2'].iloc[-1]

        # Energy trend
        if 'primary_energy_consumption' in country_data.columns and country_data['primary_energy_consumption'].notna().sum() > 0:
            energy_trend = (recent_data['primary_energy_consumption'].iloc[-1] - recent_data['primary_energy_consumption'].iloc[0]) / len(recent_data)
            current_energy = country_data['primary_energy_consumption'].iloc[-1]
        else:
            energy_trend = 0
            current_energy = 0

        for year in future_years:
            years_ahead = year - country_data['year'].iloc[-1]
            projected_co2 = current_co2 + (co2_trend * years_ahead)
            future_co2.append(max(0, projected_co2))

            if energy_trend != 0:
                projected_energy = current_energy + (energy_trend * years_ahead)
                future_energy.append(max(0, projected_energy))
            else:
                future_energy.append(0)

        # Create projection plot
        fig_projection = make_subplots(
            rows=2, cols=1,
            subplot_titles=['CO‚ÇÇ Emissions Projection', 'Energy Consumption Projection'],
            vertical_spacing=0.15
        )

        # Historical + Projected CO‚ÇÇ
        fig_projection.add_trace(
            go.Scatter(
                x=country_data['year'],
                y=country_data['co2'],
                mode='lines+markers',
                name='Historical CO‚ÇÇ',
                line=dict(color='red', width=3)
            ),
            row=1, col=1
        )

        fig_projection.add_trace(
            go.Scatter(
                x=future_years,
                y=future_co2,
                mode='lines+markers',
                name='Projected CO‚ÇÇ',
                line=dict(color='red', width=3, dash='dash')
            ),
            row=1, col=1
        )

        # Historical + Projected Energy
        if 'primary_energy_consumption' in country_data.columns and country_data['primary_energy_consumption'].notna().sum() > 0:
            fig_projection.add_trace(
                go.Scatter(
                    x=country_data['year'],
                    y=country_data['primary_energy_consumption'],
                    mode='lines+markers',
                    name='Historical Energy',
                    line=dict(color='blue', width=3)
                ),
                row=2, col=1
            )

            fig_projection.add_trace(
                go.Scatter(
                    x=future_years,
                    y=future_energy,
                    mode='lines+markers',
                    name='Projected Energy',
                    line=dict(color='blue', width=3, dash='dash')
                ),
                row=2, col=1
            )

        fig_projection.update_layout(
            height=700,
            showlegend=True,
            title_text=f"Future Projections for {SELECTED_COUNTRY} (2024-2040)"
        )
        fig_projection.update_xaxes(title_text="Year")
        fig_projection.update_yaxes(title_text="CO‚ÇÇ Emissions (Mt)", row=1, col=1)
        fig_projection.update_yaxes(title_text="Energy Consumption (TWh)", row=2, col=1)

        fig_projection.show()

        # Projection summary
        avg_co2_growth = np.mean(np.diff(future_co2))
        total_co2_increase = future_co2[-1] - current_co2

        print(f"\nüìä PROJECTION SUMMARY")
        print("-" * 70)
        print(f"Average Annual CO‚ÇÇ Growth: {avg_co2_growth:.2f} Mt/year")
        print(f"Total CO‚ÇÇ Increase (2024-2040): {total_co2_increase:.2f} Mt ({(total_co2_increase/current_co2)*100:.1f}%)")

        if future_energy and max(future_energy) > 0:
            avg_energy_growth = np.mean(np.diff(future_energy))
            print(f"Average Annual Energy Growth: {avg_energy_growth:.2f} TWh/year")

# ============================================================================
# SECTION 7: GLOBAL ANALYTICS DASHBOARD
# ============================================================================

print("\n" + "="*70)
print("GLOBAL ANALYTICS DASHBOARD")
print("="*70)

# 1. Global CO2 Trend
print("\n1. üåç Global CO‚ÇÇ Emissions Trend")
world_data = df[df['country'] == 'World']
world_co2 = world_data[['year', 'co2']].dropna()

if not world_co2.empty:
    fig_global = px.line(
        world_co2,
        x='year',
        y='co2',
        title='Global CO‚ÇÇ Emissions Over Time',
        labels={'co2': 'CO‚ÇÇ Emissions (Million Tonnes)', 'year': 'Year'}
    )
    fig_global.update_traces(line=dict(color='#e74c3c', width=3))
    fig_global.update_layout(height=500)
    fig_global.show()

# 2. Emissions by Source
print("\n2. ‚ö° Global CO‚ÇÇ Emissions by Source")
df_world = df[df['country'] == 'World']
df_filtered_world = df_world[(df_world['year'] >= 1990) & (df_world['year'] <= df_world['year'].max())].copy()

source_columns = []
potential_sources = ['coal_co2', 'oil_co2', 'gas_co2', 'cement_co2', 'flaring_co2', 'other_industry_co2']

for col in potential_sources:
    if col in df_filtered_world.columns and not df_filtered_world[col].isna().all():
        source_columns.append(col)

if source_columns:
    fig_sources = px.bar(
        df_filtered_world.tail(30),
        x='year',
        y=source_columns,
        title='Global CO‚ÇÇ Emissions by Source (Recent Decades)',
        labels={
            "value": "Annual CO‚ÇÇ Emissions (Million tonnes)",
            "variable": "Source",
            "year": "Year"
        },
        color_discrete_sequence=px.colors.qualitative.Set1
    )
    fig_sources.update_layout(barmode='stack', height=500)
    fig_sources.show()

# 3. Top Contributors
print("\n3. üå°Ô∏è Top Temperature Impact Contributors")
temp_data = df[df['temperature_change_from_co2'].notna()].copy()

if not temp_data.empty:
    latest_year = temp_data['year'].max()
    latest_temp_data = temp_data[temp_data['year'] == latest_year]

    exclude_entities = [
        'World', 'Asia', 'Europe', 'North America', 'South America', 'Africa', 'Oceania',
        'European Union (27)', 'European Union (28)', 'High-income countries',
        'Low-income countries', 'Upper-middle-income countries', 'Lower-middle-income countries',
        'OECD', 'Non-OECD', 'G7', 'G20'
    ]

    country_temp_data = latest_temp_data[~latest_temp_data['country'].isin(exclude_entities)]
    top_temp_contributors = country_temp_data.nlargest(8, 'temperature_change_from_co2')

    if len(top_temp_contributors) > 0:
        fig_pie = px.pie(
            top_temp_contributors,
            values='temperature_change_from_co2',
            names='country',
            title=f'Share of Temperature Impact from CO‚ÇÇ ({int(latest_year)})',
            color_discrete_sequence=px.colors.sequential.Reds_r
        )
        fig_pie.update_traces(textposition='inside', textinfo='percent+label')
        fig_pie.update_layout(height=500)
        fig_pie.show()

# 4. CO2 vs Temperature Correlation
print("\n4. üìä CO‚ÇÇ vs Temperature Correlation")
world_trends = df[df['country'] == 'World'].copy()

if not world_trends.empty and 'temperature_change_from_co2' in world_trends.columns:
    trends_clean = world_trends[['year', 'co2', 'temperature_change_from_co2']].dropna()

    if len(trends_clean) > 0:
        fig_dual = make_subplots(specs=[[{"secondary_y": True}]])

        fig_dual.add_trace(
            go.Scatter(
                x=trends_clean['year'],
                y=trends_clean['co2'],
                name="Total CO‚ÇÇ Emissions",
                line=dict(color='#e74c3c', width=3)
            ),
            secondary_y=False
        )

        fig_dual.add_trace(
            go.Scatter(
                x=trends_clean['year'],
                y=trends_clean['temperature_change_from_co2'],
                name="Temperature Change from CO‚ÇÇ",
                line=dict(color='#3498db', width=3)
            ),
            secondary_y=True
        )

        fig_dual.update_layout(
            title="CO‚ÇÇ Emissions and Temperature Change Correlation",
            height=500,
            showlegend=True
        )

        fig_dual.update_xaxes(title_text="Year")
        fig_dual.update_yaxes(title_text="CO‚ÇÇ Emissions (Million Tonnes)", secondary_y=False)
        fig_dual.update_yaxes(title_text="Temperature Change (¬∞C)", secondary_y=True)

        fig_dual.show()

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*70)
print("ANALYSIS COMPLETE")
print("="*70)
print("\n‚úÖ All visualizations and predictions generated successfully!")
print(f"\nüìå Summary:")
print(f"   ‚Ä¢ Country analyzed: {SELECTED_COUNTRY}")
print(f"   ‚Ä¢ Prediction year: {PREDICTION_YEAR}")
print(f"   ‚Ä¢ Historical data points: {len(country_data)}")
print(f"   ‚Ä¢ Year range: {int(country_data['year'].min())} - {int(country_data['year'].max())}")
print("\nüåç Data source: Our World in Data (https://github.com/owid/co2-data)")
print("="*70)

ENERGY DEMAND & CO‚ÇÇ EMISSIONS PREDICTOR
Google Colab Version

üì¶ Installing required packages...


‚úÖ All packages loaded successfully!

LOADING AND PREPROCESSING DATA
‚úÖ Data loaded successfully from Our World in Data!
üìä Dataset shape: (50191, 79)
‚úÖ Data preprocessing complete!
üìç Total valid countries: 226
üìÖ Year range: 1750 - 2023

CONFIGURATION

üåç Available countries (first 10): ['Afghanistan', 'Africa (GCP)', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 'Antigua and Barbuda', 'Argentina']
   ... and 216 more countries

üéØ Selected Country: Afghanistan
üìÖ Prediction Year: 2030

HISTORICAL TRENDS FOR Afghanistan



üìä KEY STATISTICS FOR Afghanistan
----------------------------------------------------------------------
Latest Data Year: 2023
Population: 41.5M
CO‚ÇÇ Emissions: 11.0 Mt
Energy Consumption: 27.1 TWh

MACHINE LEARNING PREDICTIONS

ü§ñ Available Machine Learning Models:
1. Linear Regression - Simple, interpretable, fast
2. Random Forest - Handles non-linear relationships, robust
3. Support Vector Regression (SVR) - Good for complex patterns

‚úÖ Selected Model: Random Forest

ü§ñ Training models for Afghanistan...

üéØ CO‚ÇÇ EMISSIONS PREDICTION FOR 2030
----------------------------------------------------------------------
Predicted CO‚ÇÇ: 10.10 Million Tonnes
Current CO‚ÇÇ (2023): 11.02 Mt
Change: -0.92 Mt (-8.4%)
Model Accuracy (R¬≤): 0.998

‚ö° ENERGY CONSUMPTION PREDICTION FOR 2030
----------------------------------------------------------------------
Predicted Energy: 33.35 TWh
Current Energy (2023): 27.14 TWh
Change: +6.21 TWh (+22.9%)
Model Accuracy (R¬≤): 0.985

FUTURE PR


üìä PROJECTION SUMMARY
----------------------------------------------------------------------
Average Annual CO‚ÇÇ Growth: 0.04 Mt/year
Total CO‚ÇÇ Increase (2024-2040): 0.66 Mt (6.0%)
Average Annual Energy Growth: -0.64 TWh/year

GLOBAL ANALYTICS DASHBOARD

1. üåç Global CO‚ÇÇ Emissions Trend



2. ‚ö° Global CO‚ÇÇ Emissions by Source



3. üå°Ô∏è Top Temperature Impact Contributors



4. üìä CO‚ÇÇ vs Temperature Correlation



ANALYSIS COMPLETE

‚úÖ All visualizations and predictions generated successfully!

üìå Summary:
   ‚Ä¢ Country analyzed: Afghanistan
   ‚Ä¢ Prediction year: 2030
   ‚Ä¢ Historical data points: 75
   ‚Ä¢ Year range: 1949 - 2023

üåç Data source: Our World in Data (https://github.com/owid/co2-data)
