<a href="https://colab.research.google.com/github/Khuliso877/AI-week-2/blob/main/week2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import requests  # For real-time API integration
# import streamlit as st # Removed Streamlit as it doesn't run directly in Colab

# Step 1: Load and Preprocess Data
# Load data from Excel file
try:
    data = pd.read_excel('climate-change-excel-4-6-mb-.xls')
    print("Dataset shape:", data.shape)
except FileNotFoundError:
    print("Error: 'climate-change-excel-4-6-mb-.xls' not found. Please ensure the file is in the correct path.")
    # You might want to exit or handle this error more gracefully in a real application
    exit()


# Assuming the Excel file has 'year', 'co2_emissions_per_capita', and 'gdp_per_capita' columns
# Adapt column names if necessary based on your Excel file
# data = data.rename(columns={'your_year_column': 'year', 'your_co2_column': 'co2_emissions_per_capita', 'your_gdp_column': 'gdp_per_capita'})


data = data.dropna()  # Clean: Drop missing
scaler = StandardScaler()
# Check if the columns exist before scaling
if 'year' in data.columns and 'gdp_per_capita' in data.columns and 'co2_emissions_per_capita' in data.columns:
    data[['year', 'gdp_per_capita']] = scaler.fit_transform(data[['year', 'gdp_per_capita']])  # Normalize features
    X = data[['year', 'gdp_per_capita']]  # Features
    y = data['co2_emissions_per_capita']  # Target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Split 80/20

    # Step 2: Train Models (Compare Linear Regression vs. Random Forest)
    models = {
        'Linear Regression': LinearRegression(),
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
    }
    predictions = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        predictions[name] = y_pred
        print(f"{name} - MAE: {mae:.2f}, R²: {r2:.2f}")

    best_model = models['Random Forest']

    # Step 3: Forecast Future Emissions (2025-2030)
    # Assuming a GDP growth of 12000 for future years. Adjust if needed.
    future_years_data = np.array([[year, 12000] for year in range(2025, 2031)])
    future_years_scaled = scaler.transform(future_years_data)
    future_emissions = best_model.predict(future_years_scaled)
    print("Forecasted CO2 (tons/capita):", future_emissions)

    # Step 4: Evaluate & Visualize
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.scatter(y_test, predictions['Random Forest'], alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel('Actual CO2'); plt.ylabel('Predicted CO2'); plt.title('Actual vs Predicted')

    plt.subplot(1, 2, 2)
    # Need to inverse transform the 'year' column for plotting
    historical_years = scaler.inverse_transform(data[['year', 'gdp_per_capita']])[:, 0]
    plt.plot(historical_years, y.values, label='Historical', color='blue')
    plt.plot(range(2025, 2031), future_emissions, label='Forecast', color='red', marker='o')
    plt.xlabel('Year'); plt.ylabel('CO2 per Capita'); plt.legend(); plt.title('Emissions Forecast')
    plt.tight_layout()
    plt.savefig('emissions_forecast.png')  # For report/presentation
    plt.show()

    # Step 5: Integrate Real-Time Data (e.g., Weather via Open-Meteo API)
    # This part will only print the temperature anomaly if executed outside of a Streamlit environment
    # Proxy: Global temp anomaly affects emissions (e.g., higher temps = more AC = more emissions)
    # Define the URL for the Open-Meteo API - Example URL, you might need to adjust parameters
    url = "/content/climate-change-excel-4-6-mb-.xls"

    def get_temp_anomaly():
        try:
            response = requests.get(url).json()
            # Assuming a baseline temperature of 14C, adjust if needed
            recent_temp = np.mean(response['daily']['temperature_2m_mean'][-7:]) - 14
            return recent_temp * 0.1  # Simple adjustment factor
        except Exception as e:
            print(f"Error fetching temperature data: {e}")
            return 0.0 # Return 0 if there's an error

    # For non-Streamlit run:
    adjustment = get_temp_anomaly()
    adjusted_forecast = future_emissions + adjustment
    print(f"Adjusted Forecast (w/ Temp Anomaly {adjustment:.2f}): {adjusted_forecast}")

else:
    print("Error: Required columns ('year', 'co2_emissions_per_capita', 'gdp_per_capita') not found in the Excel file.")

Dataset shape: (13512, 28)
Error: Required columns ('year', 'co2_emissions_per_capita', 'gdp_per_capita') not found in the Excel file.


In [5]:
%pip install streamlit

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m66.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m86.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')