# AI for Climate Action: CO₂ Emissions Forecasting

This notebook demonstrates how to forecast CO₂ emissions using machine learning techniques.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

## 2. Load and Explore Data

In [None]:
# Load the CO2 emissions dataset
try:
    df = pd.read_csv('data/co2_emissions.csv')
    print(f"Dataset loaded successfully! Shape: {df.shape}")
    print("\nFirst 5 rows:")
    print(df.head())
    print("\nDataset info:")
    print(df.info())
except FileNotFoundError:
    print("Dataset not found. Using synthetic data for demonstration.")
    # Create synthetic data for demonstration
    np.random.seed(42)
    dates = pd.date_range('2000-01-01', periods=100, freq='YS')
    emissions = 1000 + np.cumsum(np.random.randn(100) * 10) + np.arange(100) * 5
    df = pd.DataFrame({'Year': dates.year, 'CO2_Emissions': emissions})
    print(f"Synthetic dataset created! Shape: {df.shape}")
    print("\nFirst 5 rows:")
    print(df.head())

## 3. Data Visualization

In [None]:
# Plot CO2 emissions over time
plt.figure(figsize=(12, 6))
if 'Year' in df.columns and 'CO2_Emissions' in df.columns:
    plt.plot(df['Year'], df['CO2_Emissions'], marker='o', linestyle='-', color='blue')
    plt.title('CO₂ Emissions Over Time')
    plt.xlabel('Year')
    plt.ylabel('CO₂ Emissions (metric tons)')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Columns not found for plotting. Please check your dataset structure.")

## 4. Data Preprocessing

In [None]:
# Prepare features and target
if 'Year' in df.columns and 'CO2_Emissions' in df.columns:
    X = df[['Year']].values
    y = df['CO2_Emissions'].values
    
    print(f"Features shape: {X.shape}")
    print(f"Target shape: {y.shape}")
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    print(f"Training set shape: {X_train.shape}")
    print(f"Testing set shape: {X_test.shape}")
else:
    print("Required columns not found. Please check your dataset.")

## 5. Model Training and Evaluation

In [None]:
# Train Linear Regression model
if 'X_train' in locals():
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred_lr = lr_model.predict(X_test)
    
    # Evaluate the model
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)
    
    print(f"Linear Regression Results:")
    print(f"Mean Squared Error: {mse_lr:.2f}")
    print(f"R-squared Score: {r2_lr:.2f}")
    
    # Train Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    print(f"\nRandom Forest Results:")
    print(f"Mean Squared Error: {mse_rf:.2f}")
    print(f"R-squared Score: {r2_rf:.2f}")
else:
    print("Training data not available.")

## 6. Visualization of Results

In [None]:
# Plot actual vs predicted values
if 'y_test' in locals() and 'y_pred_lr' in locals():
    plt.figure(figsize=(12, 5))
    
    # Linear Regression
    plt.subplot(1, 2, 1)
    plt.scatter(y_test, y_pred_lr, alpha=0.7, color='blue')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel('Actual CO₂ Emissions')
    plt.ylabel('Predicted CO₂ Emissions')
    plt.title('Linear Regression: Actual vs Predicted')
    plt.grid(True, alpha=0.3)
    
    # Random Forest
    plt.subplot(1, 2, 2)
    plt.scatter(y_test, y_pred_rf, alpha=0.7, color='green')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel('Actual CO₂ Emissions')
    plt.ylabel('Predicted CO₂ Emissions')
    plt.title('Random Forest: Actual vs Predicted')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Prediction results not available for plotting.")

## 7. Future Predictions

In [None]:
# Make future predictions
if 'lr_model' in locals():
    # Predict for future years
    future_years = np.array([[2025], [2030], [2035], [2040]])
    future_predictions_lr = lr_model.predict(future_years)
    future_predictions_rf = rf_model.predict(future_years)
    
    print("Future CO₂ Emissions Predictions:")
    for year, pred_lr, pred_rf in zip(future_years.flatten(), future_predictions_lr, future_predictions_rf):
        print(f"Year {year}: Linear Regression: {pred_lr:.2f}, Random Forest: {pred_rf:.2f}")
    
    # Plot historical data and future predictions
    plt.figure(figsize=(12, 6))
    plt.plot(df['Year'], df['CO2_Emissions'], 'bo-', label='Historical Data', markersize=4)
    plt.plot(future_years.flatten(), future_predictions_lr, 'r--o', label='Linear Regression Forecast', markersize=6)
    plt.plot(future_years.flatten(), future_predictions_rf, 'g--s', label='Random Forest Forecast', markersize=6)
    plt.xlabel('Year')
    plt.ylabel('CO₂ Emissions')
    plt.title('CO₂ Emissions: Historical Data and Future Forecasts')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Model not available for future predictions.")

## 8. Conclusion

This notebook demonstrates a basic approach to forecasting CO₂ emissions using machine learning. The models trained can help policymakers understand future emission trends and make informed decisions for climate action.

### Next Steps:
- Incorporate more features (GDP, population, energy consumption, etc.)
- Try more advanced models (LSTM, ARIMA, etc.)
- Perform hyperparameter tuning
- Validate models with cross-validation
- Deploy the model for real-time predictions