In [5]:
#### Query to get New Data based on Date above and get into DataFrame
import duckdb

con = duckdb.connect()

# Load source and target as Delta tables (replace paths with your lakehouse paths)
datehour = "/lakehouse/default/Tables/datehour/"
weather_London_dt = "/lakehouse/default/Tables/weather_London_dt/"

# Register Delta tables
con.execute(f"CREATE TABLE datehour AS SELECT * FROM delta_scan('{datehour}')")
con.execute(f"CREATE TABLE weather_London_dt AS SELECT * FROM delta_scan('{weather_London_dt}')")

# Define the SQL query with CTEs using lakehouse source tables
query = """ 
Select date_add(MAX(w.Date), INTERVAL 1 DAY) as Max_WeatherDate
from datehour as d
left join weather_London_dt as w
    on d.Date = w.date
"""

# Execute the SQL query and fetch results as a DataFrame
Max_Weather_Date = con.sql(query).fetchdf()


# Close the connection
con.close()

In [6]:
import datetime

display(Max_Weather_Date)



In [9]:
#Convert to String
str_Weather_Date = Max_Weather_Date.to_string()
# Extract just the date part
str_Weather_Date = str_Weather_Date.split('\n')[1].split(' ', 1)[1]  # Gets '2025-04-01 00:00:00+00:00'
str_Weather_Date = str_Weather_Date.split(' ')[0] 

display(str_Weather_Date)

'2025-04-02'

In [26]:
import requests
from datetime import datetime
import pandas as pd

#Convert to String
str_Weather_Date = Max_Weather_Date.to_string()
# Extract just the date part
str_Weather_Date = str_Weather_Date.split('\n')[1].split(' ', 1)[1]  # Gets '2025-04-01 00:00:00+00:00'
str_Weather_Date = str_Weather_Date.split(' ')[0] 

# Example usage
city = 'London'
date = str_Weather_Date


# Convert the date to the required format
date_obj = datetime.strptime(date, "%Y-%m-%d")
formatted_date = date_obj.strftime("%Y-%m-%d")

# Open-Meteo API endpoint for historical weather data
url = f"https://api.open-meteo.com/v1/forecast"

# Get the latitude and longitude of the city using Open-Meteo's geocoding API
geocode_url = f"https://geocoding-api.open-meteo.com/v1/search?name={city}"
response = requests.get(geocode_url)

if response.status_code == 200:
    location_data = response.json()
    if location_data and location_data['results']:
        lat = location_data['results'][0]['latitude']
        lon = location_data['results'][0]['longitude']
    else:
        print("City not found")        
else:        
    print("Error fetching location data")


# Parameters for the API request
params = {
    'latitude': lat,
    'longitude': lon,
    'start_date': formatted_date,
    'end_date': formatted_date,
    'hourly': 'temperature_2m',
    'timezone': 'auto'
}

# Make the API request
response = requests.get(url, params=params)

if response.status_code == 200:
    weather_data = response.json()
    hourly_data = weather_data['hourly']
    
    # Create a DataFrame from the hourly data
    df_weather = pd.DataFrame({
        'date': [formatted_date] * len(hourly_data['time']),
        'time': hourly_data['time'],
        'temperature': hourly_data['temperature_2m']
    })
    
    print(f"Weather data for {city} on {date}:")
    print(df_weather)
else:
    print("Error fetching weather data")


Weather data for London on 2025-04-02:
          date              time  temperature
0   2025-04-02  2025-04-02T00:00          8.2
1   2025-04-02  2025-04-02T01:00          8.1
2   2025-04-02  2025-04-02T02:00          8.0
3   2025-04-02  2025-04-02T03:00          8.0
4   2025-04-02  2025-04-02T04:00          8.0
5   2025-04-02  2025-04-02T05:00          7.8
6   2025-04-02  2025-04-02T06:00          7.6
7   2025-04-02  2025-04-02T07:00          7.1
8   2025-04-02  2025-04-02T08:00          7.5
9   2025-04-02  2025-04-02T09:00          9.1
10  2025-04-02  2025-04-02T10:00         10.9
11  2025-04-02  2025-04-02T11:00         12.4
12  2025-04-02  2025-04-02T12:00         13.4
13  2025-04-02  2025-04-02T13:00         14.4
14  2025-04-02  2025-04-02T14:00         14.9
15  2025-04-02  2025-04-02T15:00         15.3
16  2025-04-02  2025-04-02T16:00         16.0
17  2025-04-02  2025-04-02T17:00         15.5
18  2025-04-02  2025-04-02T18:00         14.5
19  2025-04-02  2025-04-02T19:00         

In [27]:
# Converting Date or DateTime to UTC for Correct DateTime Data Type in Lakehouse
import pandas as pd
from datetime import datetime

df_dt_fixed = df_weather
df_dt_fixed['date'] = pd.to_datetime(df_dt_fixed["date"], utc=True)
df_dt_fixed['time'] = pd.to_datetime(df_dt_fixed["time"], utc=True)

display(df_dt_fixed)
