In [26]:
import json
import pandas as pd
import datetime as dt
import requests
import psycopg2
import os
import boto3
from psycopg2.extras import execute_values
from dotenv import load_dotenv
from zoneinfo import ZoneInfo

In [None]:
load_dotenv('.env', override=True)

# Get Environment Variables
endpoint = os.getenv('ENDPOINT_DL')
db_name = os.getenv('DB_NAME_DL')
username = os.getenv('USERNAME_DL')
password = os.getenv('PASSWORD_DL')
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID_INTERNAL')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY_INTERNAL')
bucket_name = os.getenv('BUCKET_NAME')

DB_CONFIG2 = {
    "host": os.getenv("var_host2"),
    "port": os.getenv("var_port"),
    "database": os.getenv("var_database2"),
    "user": os.getenv("USERNAME"),
    "password": os.getenv("var_password2")
}


In [None]:
def upload_dataframe_to_db(df, table_name, conn):
    cur = conn.cursor()
    
    CREATE_TABLE_SQL = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        city         text          NOT NULL,
        date         date          NOT NULL,
        sunrise      timestamp     NOT NULL,
        sunset       timestamp     NOT NULL,
        first_light  timestamp     NOT NULL,
        last_light   timestamp     NOT NULL,
        dawn         timestamp     NOT NULL,
        dusk         timestamp     NOT NULL,
        solar_noon   timestamp     NOT NULL,
        golden_hour  timestamp     NOT NULL,
        day_length   interval      NOT NULL,
        PRIMARY KEY (city, date)
    );
    """

    cur.execute(CREATE_TABLE_SQL)

    INSERT_SQL = f"""
    INSERT INTO {table_name} (
        city, date, sunrise, sunset, first_light, last_light,
        dawn, dusk, solar_noon, golden_hour, day_length
    ) VALUES %s
    ON CONFLICT (city, date) DO UPDATE SET
        sunrise      = EXCLUDED.sunrise,
        sunset       = EXCLUDED.sunset,
        first_light  = EXCLUDED.first_light,
        last_light   = EXCLUDED.last_light,
        dawn         = EXCLUDED.dawn,
        dusk         = EXCLUDED.dusk,
        solar_noon   = EXCLUDED.solar_noon,
        golden_hour  = EXCLUDED.golden_hour,
        day_length   = EXCLUDED.day_length;
    """

    cols = [
        "city","date","sunrise","sunset","first_light","last_light",
        "dawn","dusk","solar_noon","golden_hour","day_length"
    ]
    rows = df[cols].where(pd.notnull(df), None).to_numpy().tolist()

    execute_values(cur, INSERT_SQL, rows, page_size=500)     
    
    cur.close()

def upload_dataframe_to_bucket(df, foldername,s3 ,bucket_name):
    today = str(dt.datetime.today().date())
    key = foldername + '/' + foldername + '_' + today + '.json'  # Key = path in the bucket

    data = df.reset_index().to_json(orient="records", date_format="iso")

    s3.put_object(
        Bucket=bucket_name,
        Key=key,
        Body=json.dumps(data),
        ContentType='application/json'
    )

def connect_to_db2():
    return psycopg2.connect(**DB_CONFIG2)

def get_city_data():
    conn = connect_to_db2()
    try:
        query = "SELECT city, lat, long FROM dim_locations;"
        df = pd.read_sql_query(query, conn)
        return df
    finally:
        conn.close()

def fetch_sun_data(city, lat, lng, start):
    url = f"https://api.sunrisesunset.io/json?lat={lat}&lng={lng}&date={start}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error on {city}: {response.status_code}")
        return None
    return response.json()

def extract_sun_data(city, sun_json):
    df = pd.json_normalize(sun_json["results"])
    df["date"] = pd.to_datetime(df["date"]) 
    df["city"] = city
    return df

def lambda_handler():
    df_coords = get_city_data()
    all_sun_data = pd.DataFrame([])

    for _, row in df_coords.iterrows():
            city = row["city"]
            lat = row["lat"]
            lng = row["long"]
            sun_json = fetch_sun_data(city, lat, lng, start=str(dt.datetime.now().date()-dt.timedelta(days=1)))
            all_sun_data = pd.concat([all_sun_data, extract_sun_data(city, sun_json)], ignore_index=True)

    time_cols = [
    "sunrise", "sunset", "first_light", "last_light",
    "dawn", "dusk", "solar_noon", "golden_hour"
    ]

    for col in time_cols:
        all_sun_data[col] = pd.to_datetime(
            all_sun_data["date"].dt.strftime("%Y-%m-%d") + " " + all_sun_data[col],
            format="%Y-%m-%d %I:%M:%S %p"
        ).dt.tz_localize(ZoneInfo("UTC"))
        all_sun_data[col] = all_sun_data[col].dt.tz_convert(ZoneInfo("Europe/Zurich"))
    all_sun_data["day_length"] = pd.to_timedelta(all_sun_data["day_length"])

    try:
        print("Connecting to DB & Bucket...")
        conn = psycopg2.connect(
            host=endpoint,
            dbname=db_name,
            user=username,
            password=password
        )
        conn.set_session(autocommit=True)

        print("Connection to DB successful.")

        s3 = boto3.client('s3',
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
            )

        print("Connection to Bucket successful.")

        upload_dataframe_to_db(all_sun_data, "tbl_weather_sun_data", conn)
        print("Data uploaded to DB successfully.")

        upload_dataframe_to_bucket(all_sun_data, "weather_sun",s3,bucket_name)
        print("Data uploaded to Bucket successfully.")

    except Exception as e:
        print("Error:", e)

lambda_handler()

date                          datetime64[ns]
sunrise        datetime64[ns, Europe/Zurich]
sunset         datetime64[ns, Europe/Zurich]
first_light    datetime64[ns, Europe/Zurich]
last_light     datetime64[ns, Europe/Zurich]
dawn           datetime64[ns, Europe/Zurich]
dusk           datetime64[ns, Europe/Zurich]
solar_noon     datetime64[ns, Europe/Zurich]
golden_hour    datetime64[ns, Europe/Zurich]
day_length                   timedelta64[ns]
timezone                              object
utc_offset                             int64
city                                  object
dtype: object
Connecting to DB & Bucket...
Connection to DB successful.
Connection to Bucket successful.
Data uploaded to DB successfully.
Data uploaded to Bucket successfully.


In [38]:
https://api.sunrisesunset.io/json?lat=47.050545&lng=8.305468&date=2025-04-29

https://api.sunrisesunset.io/json?lat=38.907192&lng=-77.036873&date_start=1990-05-01&date_end=1990-07-01

SyntaxError: leading zeros in decimal integer literals are not permitted; use an 0o prefix for octal integers (2908049823.py, line 1)