In [7]:
#Import needed libraries
import requests
import pandas as pd
from datetime import datetime, timezone
import os

In [8]:
#begin the extract from source: open weather
API_KEY = "f1591bb9e37234ecebecc1fcc728649b"
BASE_URL = "http://api.openweathermap.org/data/2.5/weather"

def get_cities():
    return [
        "New York", "Boston", "Philadelphia", "Miami", "Atlanta", 
        "Baltimore", "Tampa", "Orlando", "Charlotte", "Jacksonville"
    ]

def fetch_weather_data(cities):
    records = []
    
    for city in cities:
        url = f"{BASE_URL}?q={city}&appid={API_KEY}&units=metric"
        response = requests.get(url)
        
        if response.status_code == 200:
            data = response.json()
            records.append({
                "city": data["name"],
                "temp": data["main"]["temp"],
                "humidity": data["main"]["humidity"],
                "description": data["weather"][0]["description"],
                "timestamp": datetime.fromtimestamp(data["dt"], timezone.utc)
            })
            print(f"Success {city}")
        else:
            print(f"Fail {city}: Failed")
    
    return records

In [9]:
# Transforming the dataset
def transform_data(raw_records):
    df = pd.DataFrame(raw_records)
    
#Adding more fields
    df['temp_fahrenheit'] = (df['temp'] * 9/5) + 32
    df['temp_category'] = df['temp'].apply(lambda x: 
        'Hot' if x > 25 else 'Warm' if x > 15 else 'Cool')
    df['humidity_level'] = df['humidity'].apply(lambda x: 
        'High' if x > 70 else 'Medium' if x > 40 else 'Low')
    
    return df

In [10]:
#Loading to save a s CSV
def load_data(df):
    os.makedirs("data", exist_ok=True)
    df.to_csv("data/weather_data.csv", index=False)
    print(f"Data saved: {len(df)} cities")
    return "data/weather_data.csv"

In [11]:
#Running the ETL
def run_etl():
    print("=== The Weather ETL Pipeline Project ===")
    
    # Extract
    cities = get_cities()
    raw_data = fetch_weather_data(cities)
    
    # Transform  
    df = transform_data(raw_data)
    
    # Load
    filepath = load_data(df)
    
    print(" ETL Process Completed")
    return df

if __name__ == "__main__":
    df = run_etl()
    print("\nSample of the data:")
    print(df[['city', 'temp', 'temp_category', 'humidity_level']].head())

=== The Weather ETL Pipeline Project ===
Success New York
Success Boston
Success Philadelphia
Success Miami
Success Atlanta
Success Baltimore
Success Tampa
Success Orlando
Success Charlotte
Success Jacksonville
Data saved: 10 cities
 ETL Complete!

Sample data:
           city   temp temp_category humidity_level
0      New York  28.15           Hot         Medium
1        Boston  23.56          Warm           High
2  Philadelphia  27.56           Hot         Medium
3         Miami  29.16           Hot           High
4       Atlanta  29.80           Hot         Medium
