In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = 14, 7

file_name = 'emissions_data.csv'
# The actual column name from the uploaded file contains a hidden Unicode character (\u200b).
original_emissions_col = 'CO2\u200b Emissions (Million Metric Tons)'
new_emissions_col = 'Emissions_MMT'


In [7]:
try:
    # Load data without setting index initially
    df = pd.read_csv(file_name)

    # Explicitly convert 'Date' column to datetime and set as index
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df = df[df['Date'].notna()] # Filter out rows where date conversion failed
    df.set_index('Date', inplace=True)
    
    # Rename the key column and convert to numeric, ensuring the correct column name is targeted
    df.rename(columns={original_emissions_col: new_emissions_col}, inplace=True)
    df[new_emissions_col] = pd.to_numeric(df[new_emissions_col], errors='coerce')

    # Drop any row where the key value is NaN
    df = df.dropna(subset=[new_emissions_col])
    
    # Select only the emissions column, matching the user's logic `df = df[['Emissions_MMT']].dropna()`
    df = df[[new_emissions_col]].copy()

    print(f"Data Loaded. First 5 rows:\n{df.head()}")

except Exception as e:
    print(f"Error loading or processing data: {e}")
    # Raise error to stop execution if load fails
    raise

Data Loaded. First 5 rows:
            Emissions_MMT
Date                     
1973-01-01         72.076
1974-02-01         64.442
1975-01-01         55.460
1975-12-01         67.340
1976-11-01         88.100


In [None]:
# --- 3. Exploratory Data Analysis (EDA) and Visualization ---

# Plot the raw time series data
plt.figure(figsize=(12, 6))
plt.plot(df.index, df[new_emissions_col], label='Monthly Emissions', color='blue', alpha=0.7)
plt.title('Monthly Coal Power Sector CO2 Emissions Over Time', fontsize=16)
plt.xlabel('Year')
plt.ylabel('Emissions (Million Metric Tons)')
plt.legend()
plt.tight_layout()
plt.savefig('monthly_emissions_plot.png')
plt.close()

# Visualize the trend by year (rolling average)
df['Annual_Avg'] = df[new_emissions_col].rolling(window=12).mean()

plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Annual_Avg'], label='12-Month Rolling Average', color='red')
plt.title('Annual Trend of CO2 Emissions (Simulated)', fontsize=16)
plt.xlabel('Year')
plt.ylabel('Emissions (Million Metric Tons)')
plt.legend()
plt.tight_layout()
plt.savefig('emissions_trend_plot.png')
plt.close()