In [4]:
import os
import zipfile
from pathlib import Path

# --- CONFIGURATION ---
CURRENT_DIR = Path.cwd()
# Ensures data goes to 'london_energy_project/data/raw/' relative to notebook
RAW_DATA_DIR = CURRENT_DIR.parent / "data" / "raw"

def download_specific_data():
    """
    Downloads only the necessary files:
    1. 'daily_dataset.csv' from Smart Meters (ignoring the huge half-hourly files).
    2. 'london_weather.csv' from London Weather Data.
    """
    
    # 1. Ensure directory exists
    if not RAW_DATA_DIR.exists():
        RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
        print(f"Created directory: {RAW_DATA_DIR}")

    # 2. Authenticate Kaggle API
    try:
        from kaggle.api.kaggle_api_extended import KaggleApi
        api = KaggleApi()
        api.authenticate()
        print("Kaggle API authenticated.")
    except Exception as e:
        print(f"Error: Authentication failed. Check 'kaggle.json'. Details: {e}")
        return

    print("\n--- Starting Optimized Download ---")

    # ---------------------------------------------------------
    # TASK A: Download ONLY 'daily_dataset.csv' (Energy Data)
    # ---------------------------------------------------------
    energy_dataset = "jeanmidev/smart-meters-in-london"
    target_file = "daily_dataset.csv"
    
    print(f"1. Downloading specific file: '{target_file}' from {energy_dataset}...")
    
    try:
        # Download specific file
        api.dataset_download_file(energy_dataset, target_file, path=RAW_DATA_DIR)
        
        # Kaggle often zips single files if they are large (>10MB)
        # Check if we got 'daily_dataset.csv.zip'
        zip_name = RAW_DATA_DIR / (target_file + ".zip")
        
        if zip_name.exists():
            print(f"   -> Extracting {zip_name.name}...")
            with zipfile.ZipFile(zip_name, 'r') as zip_ref:
                zip_ref.extractall(RAW_DATA_DIR)
            os.remove(zip_name) # Clean up
            print("   -> Extraction complete.")
        else:
            print("   -> File downloaded directly (uncompressed).")
            
    except Exception as e:
        print(f"   ❌ Error downloading energy data: {e}")

    # ---------------------------------------------------------
    # TASK B: Download Weather Data (Small enough to fetch all)
    # ---------------------------------------------------------
    weather_dataset = "emmanuelfwerr/london-weather-data"
    print(f"\n2. Downloading full weather dataset: {weather_dataset}...")
    
    try:
        api.dataset_download_files(weather_dataset, path=RAW_DATA_DIR, unzip=True)
        print("   -> Download and extraction complete.")
    except Exception as e:
        print(f"   ❌ Error downloading weather data: {e}")

    # ---------------------------------------------------------
    # SUMMARY
    # ---------------------------------------------------------
    print("\n--- Download Summary ---")
    if RAW_DATA_DIR.exists():
        files = [f.name for f in RAW_DATA_DIR.glob('*.csv')]
        print(f"Files available in '{RAW_DATA_DIR}':")
        for f in files:
            print(f" - {f}")
    else:
        print("Warning: Data directory is empty.")

if __name__ == "__main__":
    download_specific_data()

Created directory: c:\Tanulmanyok\Projektmunka_Repository\Projektmunka\data\raw
Kaggle API authenticated.

--- Starting Optimized Download ---
1. Downloading specific file: 'daily_dataset.csv' from jeanmidev/smart-meters-in-london...
Dataset URL: https://www.kaggle.com/datasets/jeanmidev/smart-meters-in-london
   -> Extracting daily_dataset.csv.zip...
   -> Extraction complete.

2. Downloading full weather dataset: emmanuelfwerr/london-weather-data...
Dataset URL: https://www.kaggle.com/datasets/emmanuelfwerr/london-weather-data
   -> Download and extraction complete.

--- Download Summary ---
Files available in 'c:\Tanulmanyok\Projektmunka_Repository\Projektmunka\data\raw':
 - daily_dataset.csv
 - london_weather.csv
