In [None]:
import os
import sys
sys.path.append("../src/")
sys.path.append("../")

import config
import data

In [None]:
from datetime import datetime, timedelta
import pandas as pd

CurrentDate = pd.to_datetime(datetime.utcnow()).floor("H")

#Fetching Data Since 28 Days ago to Today
#By doing this we are adding a lot of Redundancy to the Pipeline
#Lot of Redundancy -> Pipeline doesn't brake if it misses a job
FetchDataStart = CurrentDate
FetchDataEnd = CurrentDate - timedelta(days = 28) 

In [None]:
def FetchBatchRawData(fromdate:datetime, todate:datetime) -> pd.DataFrame:
    
    #Simulate Production of new Data Points by Sampling Historical Data (from 52 weeks ago)
    #We do not have access to Real Time Taxi Data, so here's some Synthetic Data (roughly created)
    
    from_ = fromdate - timedelta(days = 7*52)
    to_ = todate - timedelta(days = 7*52)
    
    #Download 2 Files from Website
    rides = data.LoadRawData(year = from_.year, months = from_.month)
    rides = rides[rides["PickupDatetime"] >= from_]
    rides2 = data.LoadRawData(year = to_.year, months = to_.month)
    rides2 = rides2[rides2["PickupDatetime"] <= to_]
    
    rides = pd.concat([rides, rides2])
    
    #Shift Data to pretend it's recent
    rides["PickupDatetime"] += timedelta(days = 7*52)
    
    rides.sort_values(by=["PickupLocationID", "PickupDatetime"], inplace = True)
    
    return rides

In [None]:
rides = FetchBatchRawData(fromdate = FetchDataStart, todate = FetchDataEnd)

In [None]:
TS_Data = data.TransformRawDataIntoTSData(rides)

In [None]:
TS_Data.rename(columns = {"PickupHour":"pickup_hour", "NumOfRides":"numrides", "PickupLocationID":"pickup_location_id"}, inplace = True)

In [None]:
import hopsworks

#Connect to the Project
Project = hopsworks.login(project = config.HopsworksProjectName, api_key_value = config.HOPSWORKSAPIKEY)

#Connect to Feature Store
FeatureStore = Project.get_feature_store()

#Connect to the Feature Group
FeatureGroup = FeatureStore.get_or_create_feature_group(name = config.FeatureGroupName,
                                                        version = config.FeatureGroupVersion,
                                                        description = "TimeSeries Data at Hourly Frequency",
                                                        primary_key = ["pickup_location_id", "pickup_hour"],
                                                        event_time = "pickup_hour"
                                                       )

In [None]:
FeatureGroup.insert(TS_Data, write_options = {"wait_for_job":False})