# imports

In [1]:
import pandas as pd
import numpy as np
import folium
from datetime import datetime, timedelta
import geopandas as gdp
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
from google.colab import drive
from statsmodels.tsa.holtwinters import ExponentialSmoothing
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
from sklearn.linear_model import LinearRegression
class Co2EmissionAnalyser :

  def __init__(self, data_path : str) :

    self.data = pd.read_csv(data_path)
    self.parsed_data = self._parse_id_column()

  def _parse_id_column(self) -> pd.DataFrame :

    df = self.data.copy()
    id_parts = df['ID_LAT_LON_YEAR_WEEK'].str.replace('ID_', '').str.split('_')

    #Extracting separate columns
    df['latitude'] = id_parts.str[0].astype(float)
    df['longitude'] = id_parts.str[1].astype(float)
    df['year'] = id_parts.str[2].astype(int)
    df['week'] = id_parts.str[3].astype(int)

    df['date'] = df.apply(
            lambda x: datetime.strptime(f"{x['year']}-W{x['week']:02d}-1", "%Y-W%W-%w"),
            axis=1
        )

    return df

  def analyze_emissions_by_location(self) -> pd.DataFrame:

        location_analysis = self.parsed_data.groupby(['latitude', 'longitude']).agg({
            'emission': ['mean', 'min', 'max', 'std', 'count']
        }).round(4)

        return location_analysis

  def analyze_temporal_trends(self) -> pd.DataFrame:

        # Grouping by week and calculate statistics
        weekly_trends = self.parsed_data.groupby(['year', 'week']).agg({
            'emission': ['mean', 'min', 'max', 'std']
        }).round(4)

        return weekly_trends

  def create_interactive_map(self) -> folium.Map:

        # Centering map on Rwanda's coordinates
        rwanda_center = [-1.9403, 29.8739]  # Approximate center of Rwanda
        m = folium.Map(location=rwanda_center, zoom_start=8)

        # Adding emission points to map
        for idx, row in self.parsed_data.iterrows():

            intensity = min(row['emission'] / self.parsed_data['emission'].max(), 1)

            # Creating popup content
            popup_content = f"""
                Location: {row['latitude']}, {row['longitude']}<br>
                Date: Week {row['week']}, {row['year']}<br>
                Emission: {row['emission']:.4f}
            """

            # Adding marker to map
            folium.CircleMarker(
                location=[row['latitude'], row['longitude']],
                radius=8,
                color='red',
                fill=True,
                popup=popup_content,
                opacity=intensity,
                fill_opacity=intensity * 0.7
            ).add_to(m)

        return m

  def analyse_temporal_patterns(self) -> pd.DataFrame:

        # Grouping by year and week to see temporal patterns
        temporal_analysis = self.parsed_data.groupby(
            ['year', 'week']
        )['emission'].agg(['mean', 'max', 'min', 'std']).reset_index()

        return temporal_analysis


  def predict_emissions(self, forecast_weeks: int = 4) -> pd.DataFrame:

        # Preparing time series data
        ts_data = self.parsed_data.groupby('date')['emission'].mean().reset_index()

        # Create numeric time index for regression
        ts_data['time_index'] = range(len(ts_data))

        # Fit linear regression
        model = LinearRegression()
        X = ts_data['time_index'].values.reshape(-1, 1)
        y = ts_data['emission'].values

        model.fit(X, y)

        # Create future time points
        future_indices = np.arange(
            len(ts_data),
            len(ts_data) + forecast_weeks
        ).reshape(-1, 1)

        # Make predictions
        predictions = model.predict(future_indices)

        # Create future dates
        last_date = ts_data['date'].max()
        future_dates = [
            last_date + timedelta(weeks=i+1)
            for i in range(forecast_weeks)
        ]

        # Create prediction DataFrame
        forecast_df = pd.DataFrame({
            'date': future_dates,
            'predicted_emission': predictions
        })

        return forecast_df


  def create_trend_visualization(self) -> None:

        # Creating time series plot
        plt.figure(figsize=(12, 6))

        # Plotting average emissions by date
        daily_emissions = (
            self.parsed_data
            .groupby('date')['emission']
            .mean()
            .plot(kind='line', marker='o')
        )

        plt.title('Average CO2 Emissions Over Time')
        plt.xlabel('Date')
        plt.ylabel('Emission Level')
        plt.grid(True)
        plt.xticks(rotation=45)

        # Saving the plot
        plt.tight_layout()
        plt.savefig('emission_trends.png')
        plt.close()

In [7]:
try:
    # Creating analyser instance
    analyser = Co2EmissionAnalyser('/content/drive/MyDrive/Projects/CO2_Emission_project/Data/s.csv')

    # Analysing location-based patterns
    location_stats = analyser.analyze_emissions_by_location()
    print("Location-based Analysis:")
    print(location_stats.head())

    # Creating interactive map
    emission_map = analyser.create_interactive_map()
    emission_map.save('rwanda_emissions_map.html')

    # Analysing temporal patterns
    time_analysis = analyser.analyze_temporal_trends()
    print("\nTemporal Analysis:")
    print(time_analysis.head())

    # Making predictions
    predictions = analyser.predict_emissions(forecast_weeks=4)
    print("\nEmission Predictions for Next 4 Weeks:")
    print(predictions)

    # Creating visualizations
    analyser.create_trend_visualization()

except Exception as e:
    print(f"An error occurred: {str(e)}")

Location-based Analysis:
                   emission                                  
                       mean      min       max      std count
latitude longitude                                           
-3.299   30.301     29.1932   0.7918   76.6237  15.0768    49
-3.287   29.713     67.6165  45.6460  105.7875  13.5944    49
-3.174   29.926     52.1634  23.0645   81.0720  12.2713    49
-3.161   28.839      9.6461   0.0385   49.6438   8.2745    49
-3.153   30.347     21.6183   0.4872   50.0427  13.3483    49

Temporal Analysis:
           emission                             
               mean     min        max       std
year week                                       
2022 0     145.4240  0.6780  2250.4856  128.5527
     1      77.4042  0.2492  2126.8958  115.8833
     2      74.0449  0.2452  2103.9207  115.0446
     3      72.1645  0.0270  2111.0474  114.4567
     4      75.3951  0.0063  2022.7720  112.5452

Emission Predictions for Next 4 Weeks:
        date  predicted_emi