# Trend Analysis & Time Series Patterns

**Purpose**: Analyze time series trends, forecasting patterns, and seasonal analysis

**Date**: January 12, 2026

## Objectives
1. Analyze time series trends by country
2. Identify forecasting patterns and cycles
3. Detect seasonal patterns (if applicable)
4. Calculate trend statistics and slopes
5. Visualize long-term energy transitions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
from scipy import stats
from scipy.stats import linregress
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Database connection
DB_CONFIG = {
    'host': '172.18.0.1',
    'port': 5432,
    'database': 'lianel_energy',
    'user': 'airflow',
    'password': 'P9xK2mN7vQ4wR8tY3sL6hJ5nB1cV0zX'
}

connection_string = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
engine = create_engine(connection_string)

print("âœ… Database connection established")

## 1. Load Time Series Data

In [None]:
# Load time series data
query = """
SELECT 
    cntr_code,
    year,
    total_energy_gwh,
    renewable_energy_gwh,
    fossil_energy_gwh,
    pct_renewable,
    pct_fossil,
    yoy_change_total_energy_pct,
    yoy_change_renewable_pct,
    trend_3y_slope,
    trend_5y_slope,
    rolling_3y_mean_total_energy_gwh,
    rolling_5y_mean_total_energy_gwh
FROM ml_dataset_forecasting_v1
WHERE year >= 2018  -- Filter incomplete years
ORDER BY cntr_code, year
"""

df = pd.read_sql(query, engine)
print(f"âœ… Loaded {len(df)} records")
print(f"Countries: {df['cntr_code'].nunique()}")
print(f"Years: {df['year'].min()} - {df['year'].max()}")
df.head()

## 2. Overall Trend Analysis

In [None]:
# Calculate overall trends
yearly_totals = df.groupby('year').agg({
    'total_energy_gwh': 'sum',
    'renewable_energy_gwh': 'sum',
    'fossil_energy_gwh': 'sum',
    'cntr_code': 'count'
}).reset_index()
yearly_totals['pct_renewable'] = (yearly_totals['renewable_energy_gwh'] / yearly_totals['total_energy_gwh'] * 100).round(2)

# Calculate linear trends
def calculate_trend(years, values):
    if len(years) < 2:
        return None, None, None
    slope, intercept, r_value, p_value, std_err = linregress(years, values)
    return slope, r_value, p_value

total_trend = calculate_trend(yearly_totals['year'], yearly_totals['total_energy_gwh'])
renewable_trend = calculate_trend(yearly_totals['year'], yearly_totals['renewable_energy_gwh'])
fossil_trend = calculate_trend(yearly_totals['year'], yearly_totals['fossil_energy_gwh'])
renewable_pct_trend = calculate_trend(yearly_totals['year'], yearly_totals['pct_renewable'])

print("ðŸ“ˆ Overall Trend Analysis:")
print(f"\nTotal Energy:")
print(f"  Slope: {total_trend[0]:.2f} GWh/year")
print(f"  RÂ²: {total_trend[1]**2:.3f}")
print(f"  P-value: {total_trend[2]:.4f}")

print(f"\nRenewable Energy:")
print(f"  Slope: {renewable_trend[0]:.2f} GWh/year")
print(f"  RÂ²: {renewable_trend[1]**2:.3f}")

print(f"\nFossil Energy:")
print(f"  Slope: {fossil_trend[0]:.2f} GWh/year")
print(f"  RÂ²: {fossil_trend[1]**2:.3f}")

print(f"\nRenewable Percentage:")
print(f"  Slope: {renewable_pct_trend[0]:.2f} %/year")
print(f"  RÂ²: {renewable_pct_trend[1]**2:.3f}")

# Visualize overall trends
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Total energy over time
ax1 = axes[0, 0]
ax1.plot(yearly_totals['year'], yearly_totals['total_energy_gwh'], 
         marker='o', linewidth=2, markersize=8, label='Total Energy')
if total_trend[0]:
    trend_line = total_trend[0] * yearly_totals['year'] + (yearly_totals['total_energy_gwh'].iloc[0] - total_trend[0] * yearly_totals['year'].iloc[0])
    ax1.plot(yearly_totals['year'], trend_line, '--', alpha=0.7, label='Linear Trend')
ax1.set_xlabel('Year')
ax1.set_ylabel('Total Energy (GWh)')
ax1.set_title('Total Energy Consumption Trend')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Renewable vs Fossil
ax2 = axes[0, 1]
ax2.plot(yearly_totals['year'], yearly_totals['renewable_energy_gwh'], 
         marker='o', linewidth=2, markersize=8, label='Renewable', color='green')
ax2.plot(yearly_totals['year'], yearly_totals['fossil_energy_gwh'], 
         marker='s', linewidth=2, markersize=8, label='Fossil', color='red')
ax2.set_xlabel('Year')
ax2.set_ylabel('Energy (GWh)')
ax2.set_title('Renewable vs Fossil Energy')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Renewable percentage trend
ax3 = axes[1, 0]
ax3.plot(yearly_totals['year'], yearly_totals['pct_renewable'], 
         marker='o', linewidth=2, markersize=8, color='green', label='Renewable %')
if renewable_pct_trend[0]:
    trend_line = renewable_pct_trend[0] * yearly_totals['year'] + (yearly_totals['pct_renewable'].iloc[0] - renewable_pct_trend[0] * yearly_totals['year'].iloc[0])
    ax3.plot(yearly_totals['year'], trend_line, '--', alpha=0.7, label='Linear Trend')
ax3.set_xlabel('Year')
ax3.set_ylabel('Renewable Percentage (%)')
ax3.set_title('Renewable Energy Percentage Trend')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Plot 4: YoY changes
ax4 = axes[1, 1]
yoy_by_year = df.groupby('year')['yoy_change_total_energy_pct'].mean()
ax4.bar(yoy_by_year.index, yoy_by_year.values, alpha=0.7, color='steelblue')
ax4.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax4.set_xlabel('Year')
ax4.set_ylabel('Average YoY Change (%)')
ax4.set_title('Average Year-over-Year Change by Year')
ax4.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Calculate trends for each country
country_trends = []
for country in df['cntr_code'].unique():
    country_data = df[df['cntr_code'] == country].sort_values('year')
    if len(country_data) >= 3:  # Need at least 3 points for trend
        total_trend = calculate_trend(country_data['year'], country_data['total_energy_gwh'])
        renewable_trend = calculate_trend(country_data['year'], country_data['renewable_energy_gwh'])
        renewable_pct_trend = calculate_trend(country_data['year'], country_data['pct_renewable'])
        
        country_trends.append({
            'cntr_code': country,
            'total_slope': total_trend[0] if total_trend[0] else 0,
            'total_r2': total_trend[1]**2 if total_trend[1] else 0,
            'renewable_slope': renewable_trend[0] if renewable_trend[0] else 0,
            'renewable_r2': renewable_trend[1]**2 if renewable_trend[1] else 0,
            'renewable_pct_slope': renewable_pct_trend[0] if renewable_pct_trend[0] else 0,
            'renewable_pct_r2': renewable_pct_trend[1]**2 if renewable_pct_trend[1] else 0,
            'avg_total_energy': country_data['total_energy_gwh'].mean(),
            'avg_renewable_pct': country_data['pct_renewable'].mean()
        })

trends_df = pd.DataFrame(country_trends)

print("ðŸ“Š Country-Level Trend Analysis:")
print("\nTop 10 Countries by Renewable Percentage Growth:")
top_renewable_growth = trends_df.nlargest(10, 'renewable_pct_slope')
print(top_renewable_growth[['cntr_code', 'renewable_pct_slope', 'renewable_pct_r2', 'avg_renewable_pct']].to_string(index=False))

print("\nTop 10 Countries by Total Energy Growth:")
top_energy_growth = trends_df.nlargest(10, 'total_slope')
print(top_energy_growth[['cntr_code', 'total_slope', 'total_r2', 'avg_total_energy']].to_string(index=False))

# Visualize country trends
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Renewable percentage trends by country (top 10)
ax1 = axes[0, 0]
top_countries = trends_df.nlargest(10, 'renewable_pct_slope')['cntr_code']
for country in top_countries:
    country_data = df[df['cntr_code'] == country].sort_values('year')
    ax1.plot(country_data['year'], country_data['pct_renewable'], 
             marker='o', label=country, linewidth=2, markersize=6)
ax1.set_xlabel('Year')
ax1.set_ylabel('Renewable Percentage (%)')
ax1.set_title('Top 10 Countries by Renewable Growth')
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
ax1.grid(True, alpha=0.3)

# Plot 2: Total energy trends (top 10 by growth)
ax2 = axes[0, 1]
top_energy = trends_df.nlargest(10, 'total_slope')['cntr_code']
for country in top_energy:
    country_data = df[df['cntr_code'] == country].sort_values('year')
    ax2.plot(country_data['year'], country_data['total_energy_gwh'], 
             marker='o', label=country, linewidth=2, markersize=6)
ax2.set_xlabel('Year')
ax2.set_ylabel('Total Energy (GWh)')
ax2.set_title('Top 10 Countries by Energy Growth')
ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
ax2.grid(True, alpha=0.3)

# Plot 3: Renewable percentage slope distribution
ax3 = axes[1, 0]
ax3.hist(trends_df['renewable_pct_slope'], bins=20, alpha=0.7, edgecolor='black', color='green')
ax3.axvline(x=0, color='red', linestyle='--', linewidth=2, label='No change')
ax3.set_xlabel('Renewable % Slope (%/year)')
ax3.set_ylabel('Number of Countries')
ax3.set_title('Distribution of Renewable Percentage Growth Rates')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Plot 4: Scatter: Renewable % vs Growth Rate
ax4 = axes[1, 1]
ax4.scatter(trends_df['avg_renewable_pct'], trends_df['renewable_pct_slope'], 
           alpha=0.6, s=100, c=trends_df['renewable_pct_r2'], cmap='viridis')
ax4.set_xlabel('Average Renewable Percentage (%)')
ax4.set_ylabel('Renewable Growth Rate (%/year)')
ax4.set_title('Renewable % vs Growth Rate (color = RÂ²)')
ax4.axhline(y=0, color='red', linestyle='--', linewidth=1, alpha=0.5)
ax4.grid(True, alpha=0.3)
cbar = plt.colorbar(ax4.collections[0], ax=ax4)
cbar.set_label('RÂ² (Trend Strength)')

plt.tight_layout()
plt.show()