In [10]:
!pip install requests
!pip install pandas
!pip install matplotlib
!pip install seaborn

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import requests
import pandas as pd
import os
from pathlib import Path
import time

# Create directory for downloads
download_dir = Path("../Data")
download_dir.mkdir(exist_ok=True)

print("Fetching dataset metadata from NESO API...")

# Get all resources from the API
api_url = "https://api.neso.energy/api/3/action/package_show?id=historic-demand-data"
response = requests.get(api_url)
data = response.json()

# Extract all CSV resources and construct proper download URLs
csv_resources = []
if data['success']:
    dataset_id = data['result']['id']
    resources = data['result']['resources']
    
    for resource in resources:
        if resource['format'].upper() == 'CSV':
            # Construct the download URL using the NESO API pattern
            download_url = f"https://api.neso.energy/dataset/{dataset_id}/resource/{resource['id']}/download/{resource['url'].split('/')[-1] if resource.get('url') else resource['name'].replace(' ', '_') + '.csv'}"
            
            # Alternative: try using the resource's URL directly if it exists
            if resource.get('url') and resource['url'].startswith('http'):
                download_url = resource['url']
            
            csv_resources.append({
                'name': resource['name'],
                'url': download_url,
                'resource_id': resource['id'],
                'filename': resource['url'].split('/')[-1] if resource.get('url') else f"{resource['name'].replace(' ', '_')}.csv"
            })

print(f"Found {len(csv_resources)} CSV files to download\n")

# Download all CSV files
downloaded_files = []
for i, resource in enumerate(csv_resources, 1):
    print(f"[{i}/{len(csv_resources)}] Downloading {resource['name']}...")
    
    try:
        response = requests.get(resource['url'], timeout=60, allow_redirects=True)
        response.raise_for_status()
        
        filepath = download_dir / resource['filename']
        with open(filepath, 'wb') as f:
            f.write(response.content)
        
        downloaded_files.append(filepath)
        print(f"  ✓ Saved to {filepath} ({len(response.content) / 1024 / 1024:.2f} MB)")
        
        # Small delay to be respectful to the server
        time.sleep(0.5)
        
    except Exception as e:
        print(f"  ✗ Error downloading {resource['name']}: {e}")
        print(f"     URL attempted: {resource['url']}")

print(f"\n{'='*60}")
print(f"Downloaded {len(downloaded_files)} files successfully")
print(f"{'='*60}\n")

# Combine all CSV files
print("Combining all CSV files into one dataset...")

all_dataframes = []
for filepath in downloaded_files:
    try:
        print(f"Reading {filepath.name}...")
        # FIX: Parse SETTLEMENT_DATE properly during read
        df = pd.read_csv(filepath)
        
        # Convert SETTLEMENT_DATE to datetime immediately after reading
        if 'SETTLEMENT_DATE' in df.columns:
            # Parse the date format like "01-JAN-2024"
            df['SETTLEMENT_DATE'] = pd.to_datetime(df['SETTLEMENT_DATE'], format='%d-%b-%Y', errors='coerce')
        
        all_dataframes.append(df)
        print(f"  ✓ Loaded {len(df):,} rows")
    except Exception as e:
        print(f"  ✗ Error reading {filepath.name}: {e}")

if all_dataframes:
    # Combine all dataframes
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Sort by date if SETTLEMENT_DATE column exists
    if 'SETTLEMENT_DATE' in combined_df.columns:
        combined_df = combined_df.sort_values(['SETTLEMENT_DATE', 'SETTLEMENT_PERIOD'], ignore_index=True)
    
    # Save combined dataset
    output_file = "../Data/neso_historic_demand_combined.csv"
    combined_df.to_csv(output_file, index=False)
    
    print(f"\n{'='*60}")
    print(f"✓ Combined dataset saved to: {output_file}")
    print(f"  Total rows: {len(combined_df):,}")
    print(f"  Total columns: {len(combined_df.columns)}")
    print(f"  File size: {os.path.getsize(output_file) / 1024 / 1024:.2f} MB")
    print(f"{'='*60}")
    
    # Display first few rows
    print("\nPreview of combined data:")
    print(combined_df.head())
    
    # Display column names
    print(f"\nColumns: {', '.join(combined_df.columns)}")
    
    # Display basic statistics
    if 'SETTLEMENT_DATE' in combined_df.columns:
        print(f"\nDate range: {combined_df['SETTLEMENT_DATE'].min()} to {combined_df['SETTLEMENT_DATE'].max()}")
        print(f"Valid dates: {combined_df['SETTLEMENT_DATE'].notna().sum():,} / {len(combined_df):,}")
else:
    print("No data to combine!")


Fetching dataset metadata from NESO API...
Found 25 CSV files to download

[1/25] Downloading Historic Demand Data 2009...
Found 25 CSV files to download

[1/25] Downloading Historic Demand Data 2009...
  ✓ Saved to ..\Data\demanddata_2009.csv (1.16 MB)
  ✓ Saved to ..\Data\demanddata_2009.csv (1.16 MB)
[2/25] Downloading Historic Demand Data 2010...
[2/25] Downloading Historic Demand Data 2010...
  ✓ Saved to ..\Data\demanddata_2010.csv (1.18 MB)
  ✓ Saved to ..\Data\demanddata_2010.csv (1.18 MB)
[3/25] Downloading Historic Demand Data 2011...
[3/25] Downloading Historic Demand Data 2011...
  ✓ Saved to ..\Data\demanddata_2011.csv (1.21 MB)
  ✓ Saved to ..\Data\demanddata_2011.csv (1.21 MB)
[4/25] Downloading Historic Demand Data 2012...
[4/25] Downloading Historic Demand Data 2012...
  ✓ Saved to ..\Data\demanddata_2012.csv (1.26 MB)
  ✓ Saved to ..\Data\demanddata_2012.csv (1.26 MB)
[5/25] Downloading Historic Demand Data 2013...
[5/25] Downloading Historic Demand Data 2013...
  ✓ S

In [None]:
import pandas as pd

# Load the combined CSV file
df = pd.read_csv("../Data/neso_historic_demand_combined.csv")

print("Original column names:")
print(df.columns.tolist())
print()

# Rename columns: remove spaces and make lowercase
df.columns = df.columns.str.replace(' ', '_').str.lower()

print("Renamed column names:")
print(df.columns.tolist())
print()

# Save the file with renamed columns
df.to_csv("../Data/neso_historic_demand_combined.csv", index=False)

print("✓ Columns renamed and file saved successfully!")
print(f"  Total rows: {len(df):,}")
print(f"  Total columns: {len(df.columns)}")


  df = pd.read_csv("../Data/neso_historic_demand_combined.csv")


Original column names:
['SETTLEMENT_DATE', 'SETTLEMENT_PERIOD', 'ND', 'TSD', 'ENGLAND_WALES_DEMAND', 'EMBEDDED_WIND_GENERATION', 'EMBEDDED_WIND_CAPACITY', 'EMBEDDED_SOLAR_GENERATION', 'EMBEDDED_SOLAR_CAPACITY', 'NON_BM_STOR', 'PUMP_STORAGE_PUMPING', 'IFA_FLOW', 'IFA2_FLOW', 'BRITNED_FLOW', 'MOYLE_FLOW', 'EAST_WEST_FLOW', 'NEMO_FLOW', 'NSL_FLOW', 'ELECLINK_FLOW', 'VIKING_FLOW', 'GREENLINK_FLOW', 'SCOTTISH_TRANSFER']

Renamed column names:
['settlement_date', 'settlement_period', 'nd', 'tsd', 'england_wales_demand', 'embedded_wind_generation', 'embedded_wind_capacity', 'embedded_solar_generation', 'embedded_solar_capacity', 'non_bm_stor', 'pump_storage_pumping', 'ifa_flow', 'ifa2_flow', 'britned_flow', 'moyle_flow', 'east_west_flow', 'nemo_flow', 'nsl_flow', 'eleclink_flow', 'viking_flow', 'greenlink_flow', 'scottish_transfer']

✓ Columns renamed and file saved successfully!
  Total rows: 435,408
  Total columns: 22
✓ Columns renamed and file saved successfully!
  Total rows: 435,408
  T