In [None]:
#### Query to get New Data based on Date above and get into DataFrame
import duckdb

con = duckdb.connect()

# Load source and target as Delta tables (replace paths with your lakehouse paths)
datehour = "/lakehouse/default/Tables/datehour/"

# Register Delta tables
con.execute(f"CREATE TABLE datehour AS SELECT * FROM delta_scan('{datehour}')")

# Define the SQL query with CTEs using lakehouse source tables
query = """ 
Select distinct d.Date 
from datehour as d
where d.Date between '2025-06-01' and '2025-06-03'
"""

# Execute the SQL query and fetch results as a DataFrame
df_Weather_Date = con.sql(query).fetchdf()


# Close the connection
con.close()

In [None]:
import datetime

display(df_Weather_Date)



In [None]:
import pandas as pd

# Convert the Unix timestamp (in milliseconds) to datetime
df_Weather_Date['Date'] = pd.to_datetime(df_Weather_Date['Date'], unit='ms').dt.date

# Display the result
print(df_Weather_Date)

In [None]:
import requests
from datetime import datetime
import pandas as pd

# Initialize an empty list to collect all DataFrames
all_weather_dates_dfs = []

# display(data_collect)

# Loop through each row and collect data
for row in df_Weather_Date.itertuples():

    # This is where I am assigning a variable name for each row in my looping dataset.
    var_Date =str(row.Date)

    # Example usage
    city = 'London'


    # Convert the date to the required format
    date_obj = datetime.strptime(var_Date, "%Y-%m-%d")
    formatted_date = date_obj.strftime("%Y-%m-%d")

    # Open-Meteo API endpoint for historical weather data
    url = f"https://api.open-meteo.com/v1/forecast"

    # Get the latitude and longitude of the city using Open-Meteo's geocoding API
    geocode_url = f"https://geocoding-api.open-meteo.com/v1/search?name={city}"
    response = requests.get(geocode_url)

    if response.status_code == 200:
        location_data = response.json()
        if location_data and location_data['results']:
            lat = location_data['results'][0]['latitude']
            lon = location_data['results'][0]['longitude']
        else:
            print("City not found")        
    else:        
        print("Error fetching location data")


    # Parameters for the API request
    params = {
        'latitude': lat,
        'longitude': lon,
        'start_date': formatted_date,
        'end_date': formatted_date,
        'hourly': 'temperature_2m',
        'timezone': 'auto'
    }

    # Make the API request
    response = requests.get(url, params=params)

    if response.status_code == 200:
        weather_data = response.json()
        hourly_data = weather_data['hourly']
        
        # Create a DataFrame from the hourly data
        df_weather = pd.DataFrame({
            'date': [formatted_date] * len(hourly_data['time']),
            'time': hourly_data['time'],
            'temperature': hourly_data['temperature_2m']
        })
        
        print(f"Weather data for {city} on {var_Date}:")
        print(df_weather)
    else:
        print("Error fetching weather data")
    
    # Append to list of DataFrames
    all_weather_dates_dfs.append(df_weather)

# Combine all DataFrames and write once
if all_weather_dates_dfs:
    combined_df_weather_Detail = pd.concat(all_weather_dates_dfs, ignore_index=True)
        


In [None]:
# Converting Date or DateTime to UTC for Correct DateTime Data Type in Lakehouse
import pandas as pd
from datetime import datetime

df_dt_fixed = combined_df_weather_Detail
df_dt_fixed['date'] = pd.to_datetime(df_dt_fixed["date"], utc=True)
df_dt_fixed['time'] = pd.to_datetime(df_dt_fixed["time"], utc=True)

display(df_dt_fixed)


In [None]:
# Take existing Pandas Data Frame and Write to Lakehouse Table
#### NOTE: This includes writing the data with the valid Date Time Column

import duckdb
from deltalake import write_deltalake
import pandas as pd
import notebookutils  # Fabric-specific utility
from datetime import datetime

# Define the Table Name for the Lakehouse
table_name = "weather_London_dt_loop"

# Define Data Frame Name
dataframe_name = "df_dt_fixed"

# Define the Table Mode Overwrite existing table; use "append" to add data
table_mode = "overwrite"

# Workspace ID or Workspace GUID
workspace_id ="FILL ME IN"

# Lakehouse ID or Lakehouse GUID
lakehouse_id = "FILL ME IN"

# Define the path to the Lakehouse table (adjust to your workspace and lakehouse)
table_path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{table_name}"


# Use DuckDB to query the DataFrame (optional SQL transformation step)
# Here, we just select all data, but you could add complex SQL logic
duckdb_result = duckdb.sql(f"SELECT * FROM {dataframe_name}").arrow()

# Write the result to a Delta table in the Lakehouse
write_deltalake(
    table_path,
    duckdb_result,
    mode=table_mode,  # Overwrite existing table; use "append" to add data
    engine="rust"
)

print(f"Data successfully written to Lakehouse table! {table_path}")