In [1]:
import os
import requests
import pandas as pd

In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Defining Paths and Creating Directories

DRIVE_ROOT = '/content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/'
OUTPUT_DIR = os.path.join(DRIVE_ROOT, 'data/processed')
OUTPUT_PATH = os.path.join(OUTPUT_DIR, 'output.csv')

# Creating new directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Project root set to: {DRIVE_ROOT}")
print(f"Output directory created at: {OUTPUT_DIR}")

Project root set to: /content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/
Output directory created at: /content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/data/processed


In [4]:
# The ELT Script (main.py content)

# --- Configuration (Lisbon, Portugal) ---
LATITUDE = 38.7167
LONGITUDE = -9.1333
API_URL = "https://api.open-meteo.com/v1/forecast"

In [5]:
# EXTRACT & LOAD (Raw)
print("1. [E]xtract & [L]oad (Raw): Fetching data from Open-Meteo API...")
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["temperature_2m_max", "temperature_2m_min", "precipitation_sum"],
    "timezone": "auto"
}

try:
    response = requests.get(API_URL, params=params)
    response.raise_for_status()
    daily_data = response.json().get('daily', {})
    df_raw = pd.DataFrame(daily_data)
except requests.exceptions.RequestException as e:
    print(f"Error fetching data: {e}")
    # Exit if data fetch fails
    exit()

print(f"   Raw Data extracted. Shape: {df_raw.shape}")

1. [E]xtract & [L]oad (Raw): Fetching data from Open-Meteo API...
   Raw Data extracted. Shape: (7, 4)


In [8]:
print("\n Transform: Applying transformations...")

# Transformation 1: Select and Rename Columns
df_processed = df_raw.rename(columns={
    'time': 'Date',
    'temperature_2m_max': 'Max_Temp_C',
    'temperature_2m_min': 'Min_Temp_C',
    'precipitation_sum': 'Precip_mm'
})[['Date', 'Max_Temp_C', 'Min_Temp_C', 'Precip_mm']]


 Transform: Applying transformations...


In [9]:
# Transformation 2: Compute a New Column (Temperature Range)
df_processed['Temp_Range_C'] = df_processed['Max_Temp_C'] - df_processed['Min_Temp_C']
print("   Processed Data Head:")
print(df_processed.head())

   Processed Data Head:
         Date  Max_Temp_C  Min_Temp_C  Precip_mm  Temp_Range_C
0  2025-12-07        16.9        13.8        0.2           3.1
1  2025-12-08        17.3        13.8        0.8           3.5
2  2025-12-09        17.1        15.2       30.9           1.9
3  2025-12-10        16.7        12.7        0.0           4.0
4  2025-12-11        15.3        12.9        0.3           2.4


In [10]:
# LOAD (Processed)
print(f"\n3. [L]oad (Processed): Saving clean data to Google Drive at {OUTPUT_PATH}...")


3. [L]oad (Processed): Saving clean data to Google Drive at /content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/data/processed/output.csv...


In [12]:
# Saving the DataFrame to the defined Google Drive path
df_processed.to_csv(OUTPUT_PATH, index=False)

print("\nüéâ ETL Workflow Complete.")


üéâ ETL Workflow Complete.


In [15]:
# Creating and Saving Deliverable Files to Drive

# Create the content for the files
readme_content = """
# ‚òÅÔ∏è Open-Meteo Weather Forecast ETL

This project performs an ELT workflow using the Open-Meteo API and pandas.

## Data Source
* **Source:** Open-Meteo Public API (Weather Forecast)
* **Location:** London, UK (51.5074, 0.1278)

## Transformations
1.  **Selection and Renaming:** Columns were selected and renamed for clarity (e.g., 'time' to 'Date', 'temperature_2m_max' to 'Max_Temp_C').
2.  **New Column:** Calculated 'Temp_Range_C' (Max_Temp_C - Min_Temp_C).

## How to Execute
This project was built and executed in Google Colab.

1.  Mount Google Drive.
2.  Run the ELT script.
3.  The output is saved to `data/processed/output.csv` in the project directory.

## Output Structure (data/processed/output.csv)
| Date | Max_Temp_C | Min_Temp_C | Precip_mm | Temp_Range_C |
|:---|:---|:---|:---|:---|
"""

requirements_content = """
pandas
requests
"""

# Save the files to the Drive root
files_to_save = {
    'README.md': readme_content,
    'requirements.txt': requirements_content,
    'src/main.py': 'Refer to the Colab notebook or clone the code below.'
}


main_py_path = os.path.join(DRIVE_ROOT, 'src/main.py')
os.makedirs(os.path.dirname(main_py_path), exist_ok=True)

print("\n4. Saving README.md and requirements.txt to Google Drive...")

for filename, content in files_to_save.items():
    if filename == 'src/main.py':
        continue

    file_path = os.path.join(DRIVE_ROOT, filename)
    with open(file_path, 'w') as f:
        f.write(content.strip())
    print(f"   Saved {filename} to {file_path}")


4. Saving README.md and requirements.txt to Google Drive...
   Saved README.md to /content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/README.md
   Saved requirements.txt to /content/drive/MyDrive/Facultet_Lisbon/Project_Lisbon/requirements.txt
