In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from datetime import datetime, timedelta
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set style for better visualization
plt.style.use('seaborn-v0_8')  # Use a valid style name
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = [12, 6]

# Initialize BigQuery client
client = bigquery.Client()

In [3]:
# Load generation data into a DataFrame
query = """
    SELECT settlementDate AS timestamp, levelFrom AS generation_gw
    FROM `jibber-jabber-knowledge.uk_energy_insights.bmrs_pn`
    WHERE bmUnit = 'T_HUMR-1'
"""

# Execute the query
job = client.query(query)
df = job.to_dataframe()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

print("Data loaded successfully with corrected column name.")



Data loaded successfully with corrected column name.


In [None]:
from plotly.subplots import make_subplots

In [2]:
# Fetch generation data
def fetch_generation_data():
    """Fetch T_HUMR-1 generation data"""
    query = """
    SELECT 
        DATE(settlementDate) as settlementDate,
        settlementPeriod,
        levelFrom as generation_mw,
        _ingested_utc as timestamp
    FROM `jibber-jabber-knowledge.uk_energy_insights.bmrs_pn`
    WHERE bmUnit = 'T_HUMR-1'
    AND DATE(_ingested_utc) >= DATE_SUB(CURRENT_DATE(), INTERVAL 24 MONTH)
    ORDER BY settlementDate, settlementPeriod
    """
    return client.query(query).to_dataframe()

# Fetch the data
print("Fetching generation data...")
df = fetch_generation_data()

# Basic data preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['settlementDate'] = pd.to_datetime(df['settlementDate'])
df['generation_gw'] = df['generation_mw'] / 1000

print("\nData Summary:")
print(f"Date range: {df['settlementDate'].min()} to {df['settlementDate'].max()}")
print(f"Total records: {len(df):,}")
print("\nGeneration Summary:")
print(df[['generation_mw']].describe())

Fetching combined generation and BSUoS data...





Data Summary:
Date range: 2022-12-31 00:00:00 to 2025-09-13 00:00:00
Total records: 855,601

Generation Summary:
       generation_mw
count       855601.0
mean      417.899117
std       261.168274
min              0.0
25%            290.0
50%            335.0
75%            546.0
max           1251.0


## Generation and BSUoS Analysis

Let's analyze the relationship between generation patterns and BSUoS charges, including:
1. Generation trends over time
2. BSUoS rate variations
3. Total BSUoS costs for the unit
4. Correlation between generation and BSUoS rates

In [4]:
# Create subplots for generation analysis
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=(
        'Generation Over Time',
        'Daily Generation Pattern'
    ),
    vertical_spacing=0.12
)

# 1. Generation Over Time
fig.add_trace(
    go.Scatter(
        x=df['timestamp'],
        y=df['generation_gw'],
        mode='lines',
        name='Generation (GW)'
    ),
    row=1, col=1
)

# 2. Daily Generation Pattern
hourly_gen = df.groupby(df['timestamp'].dt.hour)['generation_gw'].mean()
fig.add_trace(
    go.Scatter(
        x=hourly_gen.index,
        y=hourly_gen.values,
        mode='lines+markers',
        name='Avg Generation by Hour'
    ),
    row=2, col=1
)

# Update layout
fig.update_layout(
    height=800,
    width=1000,
    showlegend=True,
    title_text="T_HUMR-1 Generation Analysis",
)

# Update axes labels
fig.update_xaxes(title_text="Time", row=1, col=1)
fig.update_xaxes(title_text="Hour of Day", row=2, col=1)

fig.update_yaxes(title_text="Generation (GW)", row=1, col=1)
fig.update_yaxes(title_text="Average Generation (GW)", row=2, col=1)

fig.show()

# Calculate and print key statistics
print("\nKey Statistics:")
print("==============")

# 1. Generation Statistics
print("\nGeneration Statistics:")
print(f"Average Generation: {df['generation_gw'].mean():.2f} GW")
print(f"Peak Generation: {df['generation_gw'].max():.2f} GW")
print(f"Minimum Generation: {df['generation_gw'].min():.2f} GW")


Key Statistics:

Generation Statistics:
Average Generation: 455.70 GW
Peak Generation: 1251.00 GW
Minimum Generation: 0.00 GW


## Detailed Cost Analysis

Let's analyze the BSUoS cost implications and identify opportunities for cost optimization:
1. Peak BSUoS periods
2. Generation optimization opportunities
3. Cost saving potential

In [None]:
# Identify peak BSUoS periods
peak_bsuos_threshold = df['bsuos_rate'].quantile(0.9)  # Top 10% of BSUoS rates
peak_periods = df[df['bsuos_rate'] >= peak_bsuos_threshold].copy()

# Analyze peak periods
peak_periods['hour'] = peak_periods['timestamp'].dt.hour
peak_periods['month'] = peak_periods['timestamp'].dt.month
peak_periods['day_of_week'] = peak_periods['timestamp'].dt.dayofweek

# Create analysis plots
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Peak BSUoS Periods by Hour',
        'Peak BSUoS Periods by Month',
        'Generation During Peak BSUoS',
        'Cost Optimization Potential'
    )
)

# 1. Peak periods by hour
hourly_peaks = peak_periods.groupby('hour').size()
fig.add_trace(
    go.Bar(
        x=hourly_peaks.index,
        y=hourly_peaks.values,
        name='Peak Periods Count'
    ),
    row=1, col=1
)

# 2. Peak periods by month
monthly_peaks = peak_periods.groupby('month').size()
fig.add_trace(
    go.Bar(
        x=monthly_peaks.index,
        y=monthly_peaks.values,
        name='Monthly Peak Count'
    ),
    row=1, col=2
)

# 3. Generation during peak BSUoS
fig.add_trace(
    go.Scatter(
        x=peak_periods['bsuos_rate'],
        y=peak_periods['generation_gw'],
        mode='markers',
        name='Generation at Peak BSUoS'
    ),
    row=2, col=1
)

# 4. Cost optimization potential
# Calculate potential savings by reducing generation during peak BSUoS
peak_periods['potential_savings'] = (
    peak_periods['generation_mw'] * 
    (peak_periods['bsuos_rate'] - df['bsuos_rate'].median())
)
savings_by_hour = peak_periods.groupby('hour')['potential_savings'].sum()

fig.add_trace(
    go.Bar(
        x=savings_by_hour.index,
        y=savings_by_hour.values,
        name='Potential Savings'
    ),
    row=2, col=2
)

# Update layout
fig.update_layout(
    height=800,
    width=1200,
    showlegend=True,
    title_text="BSUoS Cost Optimization Analysis"
)

# Update axes labels
fig.update_xaxes(title_text="Hour of Day", row=1, col=1)
fig.update_xaxes(title_text="Month", row=1, col=2)
fig.update_xaxes(title_text="BSUoS Rate (£/MWh)", row=2, col=1)
fig.update_xaxes(title_text="Hour of Day", row=2, col=2)

fig.update_yaxes(title_text="Count of Peak Periods", row=1, col=1)
fig.update_yaxes(title_text="Count of Peak Periods", row=1, col=2)
fig.update_yaxes(title_text="Generation (GW)", row=2, col=1)
fig.update_yaxes(title_text="Potential Savings (£)", row=2, col=2)

fig.show()

# Print optimization insights
print("\nBSUoS Cost Optimization Insights:")
print("================================")

# 1. Peak BSUoS Patterns
print("\nPeak BSUoS Patterns:")
print(f"Peak BSUoS Threshold: £{peak_bsuos_threshold:.2f}/MWh")
print(f"Number of Peak Periods: {len(peak_periods)}")

# 2. Most Common Peak Hours
top_peak_hours = hourly_peaks.nlargest(3)
print("\nMost Common Peak BSUoS Hours:")
for hour, count in top_peak_hours.items():
    print(f"Hour {hour:02d}:00 - {count} occurrences")

# 3. Potential Savings
total_potential_savings = peak_periods['potential_savings'].sum()
print(f"\nTotal Potential Annual Savings: £{total_potential_savings:,.2f}")

# 4. Generation Recommendations
print("\nGeneration Recommendations:")
worst_hours = savings_by_hour.nlargest(3)
print("Consider reducing generation during:")
for hour, savings in worst_hours.items():
    print(f"Hour {hour:02d}:00 - Potential savings: £{savings:,.2f}")

# Save the results to a CSV for further analysis
peak_periods.to_csv('T_HUMR-1_peak_bsuos_analysis.csv', index=False)
print("\nDetailed analysis saved to 'T_HUMR-1_peak_bsuos_analysis.csv'")

# T_HUMBR-1 Generation Analysis
Analysis of generation patterns, stops, and weekly statistics

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from google.cloud import bigquery
import numpy as np
from typing import Dict, List, Any

# Set style for better visualization
sns.set_style("whitegrid")
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = [12, 6]

# Initialize BigQuery client
client = bigquery.Client()

In [None]:
# Fetch generation data from BigQuery
def fetch_generation_data() -> pd.DataFrame:
    """Fetch T_HUMR-1 generation data from BigQuery"""
    query = """
    WITH ranked_data AS (
      SELECT 
        settlementDate,
        settlementPeriod,
        bmUnit,
        levelFrom as generation_mw,
        _ingested_utc as timestamp,
        ROW_NUMBER() OVER (
          PARTITION BY settlementDate, settlementPeriod, bmUnit
          ORDER BY _ingested_utc DESC
        ) as rn
      FROM `jibber-jabber-knowledge.uk_energy_insights.bmrs_pn`
      WHERE bmUnit = 'T_HUMR-1'
      AND DATE(_ingested_utc) >= DATE_SUB(CURRENT_DATE(), INTERVAL 24 MONTH)
    )
    SELECT 
      settlementDate,
      settlementPeriod,
      bmUnit,
      generation_mw,
      timestamp
    FROM ranked_data
    WHERE rn = 1
    ORDER BY settlementDate, settlementPeriod
    """
    return client.query(query).to_dataframe()

# Fetch yearly statistics
def fetch_yearly_stats() -> pd.DataFrame:
    """Fetch yearly statistics for T_HUMR-1"""
    query = """
    SELECT 
        EXTRACT(YEAR FROM settlementDate) as year,
        COUNT(*) as record_count,
        AVG(CAST(levelFrom as FLOAT64)) as avg_generation_mw,
        MIN(CAST(levelFrom as FLOAT64)) as min_generation_mw,
        MAX(CAST(levelFrom as FLOAT64)) as max_generation_mw,
        STDDEV(CAST(levelFrom as FLOAT64)) as std_generation_mw
    FROM `jibber-jabber-knowledge.uk_energy_insights.bmrs_pn`
    WHERE bmUnit = 'T_HUMR-1'
    GROUP BY year
    ORDER BY year
    """
    return client.query(query).to_dataframe()

# Fetch the data
print("Fetching generation data...")
df = fetch_generation_data()
print("Fetching yearly statistics...")
yearly_stats = fetch_yearly_stats()

print("\nData Summary:")
print(f"Total records: {len(df):,}")
print(f"Date range: {df['settlementDate'].min()} to {df['settlementDate'].max()}")
print("\nYearly Statistics:")
print(yearly_stats)

## Generation Analysis
Let's analyze the generation patterns over time, including daily, weekly, and monthly trends.

In [None]:
# Convert timestamp to datetime if needed
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['month'] = df['timestamp'].dt.to_period('M')
df['generation_gw'] = df['generation_mw'] / 1000

# Calculate monthly statistics
monthly_stats = df.groupby('month').agg({
    'generation_gw': ['mean', 'min', 'max', 'std']
}).reset_index()

# Plot generation patterns
plt.figure(figsize=(15, 8))
plt.plot(df['timestamp'], df['generation_gw'], alpha=0.6, label='Generation (GW)')
plt.fill_between(df['timestamp'], 0, df['generation_gw'], alpha=0.2)
plt.title('T_HUMR-1 Generation Pattern')
plt.xlabel('Date')
plt.ylabel('Generation (GW)')
plt.grid(True, alpha=0.3)
plt.legend()

# Add trend line
z = np.polyfit(range(len(df)), df['generation_gw'], 1)
p = np.poly1d(z)
plt.plot(df['timestamp'], p(range(len(df))), "r--", alpha=0.8, label='Trend')
plt.legend()
plt.show()

# Plot monthly statistics
plt.figure(figsize=(15, 6))
plt.plot(range(len(monthly_stats)), monthly_stats['generation_gw']['mean'], 'b-', label='Mean Generation')
plt.fill_between(range(len(monthly_stats)), 
                monthly_stats['generation_gw']['min'], 
                monthly_stats['generation_gw']['max'], 
                alpha=0.2, label='Min-Max Range')
plt.xticks(range(len(monthly_stats)), [str(m) for m in monthly_stats['month']], rotation=45)
plt.title('Monthly Generation Statistics')
plt.xlabel('Month')
plt.ylabel('Generation (GW)')
plt.legend()
plt.tight_layout()
plt.show()

# Print summary statistics
print("\nMonthly Generation Summary:")
print(monthly_stats.to_string())

## Yearly Trends Analysis
Analyzing the year-over-year changes in generation patterns and operational characteristics.

In [None]:
# Create yearly trends visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Average Generation Trend
ax1.bar(yearly_stats['year'], yearly_stats['avg_generation_mw']/1000, color='skyblue')
ax1.set_title('Average Generation by Year')
ax1.set_ylabel('Average Generation (GW)')
ax1.grid(True, alpha=0.3)

# Generation Range
ax2.fill_between(yearly_stats['year'], 
                 yearly_stats['min_generation_mw']/1000,
                 yearly_stats['max_generation_mw']/1000,
                 alpha=0.3, color='green')
ax2.plot(yearly_stats['year'], yearly_stats['avg_generation_mw']/1000, 'g-')
ax2.set_title('Generation Range by Year')
ax2.set_ylabel('Generation (GW)')
ax2.grid(True, alpha=0.3)

# Variability (Standard Deviation)
ax3.plot(yearly_stats['year'], yearly_stats['std_generation_mw']/1000, 'r-o')
ax3.set_title('Generation Variability by Year')
ax3.set_ylabel('Standard Deviation (GW)')
ax3.grid(True, alpha=0.3)

# Record Count (Data Availability)
ax4.bar(yearly_stats['year'], yearly_stats['record_count'], color='purple', alpha=0.6)
ax4.set_title('Number of Records by Year')
ax4.set_ylabel('Record Count')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print yearly analysis
print("\nYearly Analysis Summary:")
print("========================")
for _, year_data in yearly_stats.iterrows():
    print(f"\nYear: {int(year_data['year'])}")
    print(f"Average Generation: {year_data['avg_generation_mw']/1000:.2f} GW")
    print(f"Min Generation: {year_data['min_generation_mw']/1000:.2f} GW")
    print(f"Max Generation: {year_data['max_generation_mw']/1000:.2f} GW")
    print(f"Standard Deviation: {year_data['std_generation_mw']/1000:.2f} GW")
    print(f"Number of Records: {year_data['record_count']:,}")

## Generation Patterns and Anomalies
Analyzing generation patterns, identifying unusual periods, and highlighting operational characteristics.

In [None]:
# Identify periods of zero or very low generation
low_gen_threshold = 0.1  # 100 MW
low_gen_periods = df[df['generation_mw'] <= low_gen_threshold].copy()

# Group consecutive periods
low_gen_periods['date_diff'] = low_gen_periods['timestamp'].diff()
low_gen_periods['new_group'] = low_gen_periods['date_diff'] > pd.Timedelta(hours=1)
low_gen_periods['group'] = low_gen_periods['new_group'].cumsum()

# Analyze low generation periods
low_gen_summary = []
for group, data in low_gen_periods.groupby('group'):
    summary = {
        'start': data['timestamp'].min(),
        'end': data['timestamp'].max(),
        'duration_hours': (data['timestamp'].max() - data['timestamp'].min()).total_seconds() / 3600,
        'avg_generation': data['generation_mw'].mean()
    }
    low_gen_summary.append(summary)

# Convert to DataFrame
low_gen_df = pd.DataFrame(low_gen_summary)

# Plot low generation periods
if not low_gen_df.empty:
    plt.figure(figsize=(15, 6))
    for _, period in low_gen_df.iterrows():
        plt.axvspan(period['start'], period['end'], 
                   color='red', alpha=0.2)
    
    plt.plot(df['timestamp'], df['generation_mw'], 'b-', alpha=0.6)
    plt.title('Generation Pattern with Low Generation Periods Highlighted')
    plt.xlabel('Date')
    plt.ylabel('Generation (MW)')
    plt.grid(True, alpha=0.3)
    plt.show()

    # Print summary of low generation periods
    print("\nLow Generation Periods Summary:")
    print("==============================")
    for _, period in low_gen_df.iterrows():
        print(f"\nPeriod: {period['start'].strftime('%Y-%m-%d %H:%M')} to {period['end'].strftime('%Y-%m-%d %H:%M')}")
        print(f"Duration: {period['duration_hours']:.1f} hours")
        print(f"Average Generation: {period['avg_generation']:.2f} MW")
else:
    print("\nNo low generation periods found with threshold of", low_gen_threshold, "MW")

## Seasonality Analysis
Analyzing seasonal patterns and variations in generation output.

In [None]:
# Add seasonal components
df['month'] = df['timestamp'].dt.month
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek

# Create seasonal plots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Monthly patterns
monthly_avg = df.groupby('month')['generation_mw'].agg(['mean', 'std']).reset_index()
ax1.errorbar(monthly_avg['month'], monthly_avg['mean']/1000, 
             yerr=monthly_avg['std']/1000, fmt='o-')
ax1.set_title('Average Generation by Month')
ax1.set_xlabel('Month')
ax1.set_ylabel('Generation (GW)')
ax1.grid(True, alpha=0.3)

# Hourly patterns
hourly_avg = df.groupby('hour')['generation_mw'].agg(['mean', 'std']).reset_index()
ax2.errorbar(hourly_avg['hour'], hourly_avg['mean']/1000, 
             yerr=hourly_avg['std']/1000, fmt='o-')
ax2.set_title('Average Generation by Hour')
ax2.set_xlabel('Hour of Day')
ax2.set_ylabel('Generation (GW)')
ax2.grid(True, alpha=0.3)

# Day of week patterns
daily_avg = df.groupby('day_of_week')['generation_mw'].agg(['mean', 'std']).reset_index()
ax3.errorbar(daily_avg['day_of_week'], daily_avg['mean']/1000, 
             yerr=daily_avg['std']/1000, fmt='o-')
ax3.set_title('Average Generation by Day of Week')
ax3.set_xlabel('Day of Week (0=Monday)')
ax3.set_ylabel('Generation (GW)')
ax3.grid(True, alpha=0.3)

# Monthly box plot
sns.boxplot(data=df, x='month', y='generation_gw', ax=ax4)
ax4.set_title('Generation Distribution by Month')
ax4.set_xlabel('Month')
ax4.set_ylabel('Generation (GW)')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print seasonal statistics
print("\nSeasonal Analysis Summary:")
print("=========================")

print("\nMonthly Statistics:")
monthly_stats = df.groupby('month')['generation_mw'].describe()/1000
print(monthly_stats)

print("\nHourly Statistics:")
hourly_stats = df.groupby('hour')['generation_mw'].describe()/1000
print(hourly_stats)

print("\nDay of Week Statistics:")
dow_stats = df.groupby('day_of_week')['generation_mw'].describe()/1000
print(dow_stats)

In [2]:
# Test import of make_subplots
try:
    from plotly.subplots import make_subplots
    print("make_subplots imported successfully.")
except ImportError as e:
    print(f"ImportError: {e}")

make_subplots imported successfully.


In [6]:
# Export visualizations as image files
fig.write_image("generation_analysis_over_time.png")
print("Visualization saved as 'generation_analysis_over_time.png'.")

Visualization saved as 'generation_analysis_over_time.png'.
