## -- Step 1 - Define Hopsworks and Google API keys and connect to Hopsworks --- 

In [None]:

# Hopsworks and Google API keys
HOPSWORKS_API_KEY = exclude.key.HOPSWORKS_API_KEY
FEATURE_GROUP_NAME = "bars_near_london_bridge"
FEATURE_GROUP_VERSION = 3

# Connect to Hopsworks
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
fs = project.get_feature_store()

### Step 2: Import Libraries


In [None]:
import datetime
import time
import pandas as pd
import json

from functions.parse_weather import get_weather_data_from_open_meteo


### Step 3: Create Weather Feature Group


In [None]:

weather_fg = fs.get_or_create_feature_group(
    name='weather',
    description='Weather characteristics of each day',
    version=1,
    primary_key=['unix_time','city_name'],
    event_time=["unix_time"],
) 


### Step 4: Backfill


In [None]:
# Read the CSV file from the specified URL into a pandas DataFrame for weather data
df_weather = pd.read_csv("https://repo.hops.works/dev/davit/air_quality/backfill_weather.csv")

# Apply the 'convert_date_to_unix' function to create a new 'unix_time' column in df_weather
df_weather["unix_time"] = pd.to_datetime(df_weather.date).apply(convert_date_to_unix)

# Convert the 'date' column in the df_weather DataFrame back to string format
df_weather.date = df_weather.date.astype(str)


### Step 4: Retrieve latest weather data from Open Meteo


In [None]:
# Storing the current time as the start time of the cell execution
start_of_cell = time.time()

# Creating an empty DataFrame to store raw weather data
df_weather_update = pd.DataFrame()

# Iterating through continents and cities in the 'target_cities' dictionary
for continent in target_cities:
    for city_name, coords in target_cities[continent].items():
        # Retrieving weather data using the 'get_weather_data_from_open_meteo' function
        # with specified parameters such as city name, coordinates, start date, end date, and forecast flag
        df_ = get_weather_data_from_open_meteo(
            city_name=city_name,
            coordinates=coords,
            start_date=last_dates_weather[city_name],
            end_date=str(today),
            forecast=True,
        )
        
        # Concatenating the retrieved data with the existing 'df_weather_update' DataFrame
        # and resetting the index to ensure proper alignment
        df_weather_update = pd.concat([df_weather_update, df_]).reset_index(drop=True)

# Dropping rows with missing values in the 'df_weather_update' DataFrame
df_weather_update.dropna(inplace=True)

# Storing the current time as the end time of the cell execution
end_of_cell = time.time()

# Printing information about the execution, including the time taken
print("-" * 64)
print(f"Parsed new weather data for ALL cities up to {str(today)}.")
print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n")


### Step 4: Insert latest weather data

In [None]:
# Creating a new column 'unix_time' in 'df_weather_update' by applying the 'convert_date_to_unix' function
df_weather_update["unix_time"] = df_weather_update["date"].apply(convert_date_to_unix)
df_weather_update.date = df_weather_update.date.astype(str)

# Insert new data
weather_fg.insert(df_weather_update)
