# 2. Update data daily

In [6]:
import pickle
import pandas as pd
import hopsworks
import os

from functions.TomTomAPI import get_traffic_map_from_grid
from functions.HolidaysnWeather import get_weather, is_holiday

In [7]:
coordinates = 59.34318, 18.05141 # Stockholm near Odenplan
zoom = 20

# Get the API key from GitHub Secrets
HOPSWORKS_API_KEY = os.getenv('HOPSWORKS_API_KEY')

# Get AQI API KEY from secrets of hopsworks
proj = hopsworks.login(project="ScalableMLandDeepLcourse")
conn = hopsworks.connection(host="c.app.hopsworks.ai", project=proj, api_key_value=os.environ.get('HOPSWORKS_API_KEY'))
secrets = conn.get_secrets_api()

TOMTOM_API_KEY = secrets.get_secret("TOMTOM_API_KEY").value
TOMTOM_API_KEY2 = secrets.get_secret("TOMTOM_API_KEY2").value
TOMTOM_API_KEY3 = secrets.get_secret("TOMTOM_API_KEY3").value
CALENDAR_API_KEY = secrets.get_secret("CALENDAR_API_KEY").value

TOMTOM_keys = [TOMTOM_API_KEY, TOMTOM_API_KEY2, TOMTOM_API_KEY3]

### 2.1. Collect traffic data

In [8]:
pickle_path = 'notebooks/variables/grid.pickle'

# Getting grid variable from memory
with open(pickle_path, 'rb') as file:
    # Deserialize and retrieve grid from pickle file
    grid = pickle.load(file)

In [9]:
# Collecting traffic flow from grid
success = False
i = 0
while i < len(TOMTOM_keys) and not success:
    try:
        traffic_map = get_traffic_map_from_grid(TOMTOM_keys[i],grid, zoom = zoom)
        success = True
    except:
        print('Failed with key:', i)
        i += 1

Failed with key: 0
Failed with key: 1
num of requests: 288


In [10]:
from shapely.geometry import LineString
import datetime

traffic_df = pd.DataFrame(traffic_map)
traffic_df['coordinates'] = traffic_df['coordinates'].apply(lambda x: LineString([(point['longitude'], point['latitude']) for point in x['coordinate']]))
traffic_df = traffic_df.drop_duplicates()
traffic_df['coordinates'] = traffic_df['coordinates'].apply(lambda x: str(x))

traffic_df['relativeSpeed'] = traffic_df.apply(lambda x: x['currentSpeed'] / x['freeFlowSpeed'], axis = 1)

today = datetime.datetime.now()
today = today.replace(second=0, microsecond=0, minute=0, hour=today.hour) + datetime.timedelta(hours=today.minute//30)
traffic_df['date']= [today for i in range(traffic_df.shape[0])]

traffic_df = traffic_df.drop(columns = ['@version'])

(177, 10)


Unnamed: 0,frc,currentSpeed,freeFlowSpeed,currentTravelTime,freeFlowTravelTime,confidence,roadClosure,coordinates,relativeSpeed,date
0,FRC4,11,11,28,28,1.0,False,LINESTRING (18.038114420612857 59.342615187279...,1.0,2024-12-18 20:00:00
1,FRC4,12,19,62,39,1.0,False,LINESTRING (18.036612383564147 59.342538726056...,0.631579,2024-12-18 20:00:00
2,FRC4,26,26,99,99,1.0,False,LINESTRING (18.060517571414948 59.338708520808...,1.0,2024-12-18 20:00:00
3,FRC7,19,19,75,75,1.0,False,LINESTRING (18.0545509974599 59.34064910896488...,1.0,2024-12-18 20:00:00
4,FRC4,13,21,153,95,0.99,False,LINESTRING (18.038586489399563 59.342495815378...,0.619048,2024-12-18 20:00:00


### 2.2. Collect weather data

In [11]:
weather_df = get_weather(coordinates)

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant
0,2024-12-18 20:00:00,6.126,-0.674,3.6,24.48,188.53627


### 2.3. Collect weather data

In [12]:
# CALENDAR_API_KEY = os.getenv('CALENDAR_API_KEY')
holiday_status = is_holiday(CALENDAR_API_KEY)
weather_df['holiday_status'] = holiday_status

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,holiday_status
0,2024-12-18 20:00:00,6.126,-0.674,3.6,24.48,188.53627,0


### 2.4. Joining data and uploading to Hopsworks

In [16]:
# Initialize the feature store
fs = proj.get_feature_store()

# Define and insert the Forecast Weather Feature Group
feature_group_name = "stockholm_traffic"
version = 1
fg = fs.get_feature_group(name=feature_group_name, version=version)
fg.insert(traffic_df)

# Define and insert the Air Quality Feature Group
feature_group_name = "stockholm_weather_holiday"
version = 1
fg = fs.get_feature_group(name=feature_group_name, version=version)
fg.insert(weather_df)

Uploading Dataframe: 100.00% |██████████| Rows 177/177 | Elapsed Time: 00:01 | Remaining Time: 00:00


Use fg.materialization_job.run(args=-op offline_fg_materialization -path hdfs:///Projects/ScalableMLandDeepLcourse/Resources/jobs/stockholm_traffic_1_offline_fg_materialization/config_1734547087388) to trigger the materialization job again.



Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Use fg.materialization_job.run(args=-op offline_fg_materialization -path hdfs:///Projects/ScalableMLandDeepLcourse/Resources/jobs/stockholm_weather_holiday_1_offline_fg_materialization/config_1734547182650) to trigger the materialization job again.



(Job('stockholm_weather_holiday_1_offline_fg_materialization', 'SPARK'), None)