# GET WEATHER FORECAST DATA

This functions store forecast data of 14 days in the future for the chosen train stations into a pickle file to later use it as database for the GTBT app call to our API.

In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import pickle

In [21]:
def special_characters(city):
    if city == 'Munchen':
        return 'München'
    elif city == 'Koln':
        return 'Köln'
    elif city == 'Wurzburg':
        return 'Würzburg'
    elif city == 'Nurnberg':
        return 'Nürnberg'
    elif city == 'Gottingen':
        return 'Göttingen'
    else:
        return city
    
def weather_forecast(start_city, end_city, user_date):
    # get weather forecast
    key = 'xxxx' #Add here your own key from https://www.visualcrossing.com

    start_city =  special_characters(start_city)
    end_city = special_characters(end_city)
    
    start_date = pd.Timestamp(user_date).round('H')
    start_date_00 = pd.Timestamp(start_date.year, start_date.month, start_date.day)
    end_date = datetime.now() + timedelta(14)
    end_date_00 = pd.Timestamp(end_date.year, end_date.month, end_date.day) + timedelta(1) - timedelta(hours=1)

    start_date_str = datetime.strftime(start_date, '%Y-%m-%d')
    end_date_str = datetime.strftime(end_date, '%Y-%m-%d')

    stations_lat_lon = pd.read_csv('api/data/Deutsche_Bahn_Haltestellen.csv', usecols=['X', 'Y', 'NAME'])
    stations_name = ['Köln Hbf',
                    'München Hbf',
                    'Mannheim Hbf',
                    'Stuttgart Hbf',
                    'Würzburg Hbf',
                    'Frankfurt(Main)Hbf',
                    'Nürnberg Hbf',
                    'Essen Hbf',
                    'Hamburg Hbf',
                    'Berlin Hbf',
                    'Hannover Hbf',
                    'Hagen Hbf',
                    'Erfurt Hbf',
                    'Göttingen'
                    ]

    station_sc = [station for station in stations_name if start_city in station]
    station_ec = [station for station in stations_name if end_city in station]
    stations = station_sc + station_ec

    nodes = ['origin_city', 'destination_city']
    trip = {node: station for node, station in zip(nodes, stations)}
    hours = [6, 12]
    c_variables = ['temp', 'prcp', 'snow', 'wspd', 'wpgt', 'coco']

    coco_forecast = pd.read_csv('api/data/weather_coco_forecast.csv', sep=';')
    coco_forecast.set_index('Code', inplace=True)
    coco_forecast = coco_forecast.to_dict()['Weather Condition']

    new_classes_forecast = {
        1: [29, 42, 43],
        2: [2, 8, 9, 19, 20, 21, 24, 27, 28, 30, 31, 32, 33, 36, 38, 39, 40, 41],
        3: [1, 4, 6, 11, 12, 14, 18, 23, 26, 35, 37],
        4: [3, 5, 7, 10, 13, 15, 16, 17, 22, 25, 34]
    }

    db_path = 'raw_data/weather_forecast.pickle'
    with open(db_path, 'rb') as handle:
        weather_response = pickle.load(handle)
    
    weather = {}
    weather_dict = {}
    df = pd.DataFrame()
    for n, s in trip.items():
        lat = stations_lat_lon[stations_lat_lon['NAME'] == s]['Y'].mean()
        lon = stations_lat_lon[stations_lat_lon['NAME'] == s]['X'].mean()

        url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{str(lat)},{str(lon)}/{start_date_str}/{end_date_str}"
        params = {'key': key,
                'unitGroup': 'metric'}
        response = requests.get(url, params=params).json()

        weather_response[s] = response
        
        response = weather_response[s]
        
        number_of_days = (end_date - start_date).days + 1
        variables = ['datetime', 'temp', 'precip', 'snow', 'windspeed', 'windgust', 'conditions']
        sw_f = pd.DataFrame(columns=variables)
        sw_f['datetime'] = pd.date_range(start_date_00, end_date_00, freq='H')
        sw_f.set_index('datetime', inplace=True)

        for d in range(number_of_days):
            for i, r in sw_f.iterrows():
                for v in variables[1:]:
                    sw_f.loc[i, v] = response['days'][d]['hours'][i.hour][v]

        sw_f.rename(columns={'precip': 'prcp',
                            'windspeed': 'wspd',
                            'windgust': 'wpgt',
                            'conditions': 'coco'}, inplace=True)

        sw_f.index.names = ['time']

        weather[s] = sw_f.copy()

        weather[s]['coco'] = weather[s]['coco'].apply(lambda x: coco_func(x, coco_forecast, new_classes_forecast))
        weather[s].reset_index(inplace=True)
        weather[s]['time']  = pd.to_datetime(weather[s]['time'])

        if n == 'origin_city':
            nn = 'oc'
        elif n == 'destination_city':
            nn = 'dc'
        for h in hours:
            weather_dict['time_' + nn + ' -' + str(h) + 'H'] = weather[s][weather[s]['time'] == start_date - timedelta(hours=h)].rename(
                columns={'temp': 'temp_' + nn + '_' + str(h),
                            'prcp': 'prcp_' + nn + '_' + str(h),
                            'snow': 'snow_' + nn + '_' + str(h),
                            'wspd': 'wspd_' + nn + '_' + str(h),
                            'wpgt': 'wpgt_' + nn + '_' + str(h),
                            'coco': 'coco_' + nn + '_' + str(h)})

    total_df = pd.DataFrame()
    for k, v in weather_dict.items():
        v.reset_index(inplace=True)
        total_df[list(v.columns)[1:]] = v[list(v.columns)[1:]]

    for v in c_variables:
        total_df[v + '_max_combined'] = total_df[[v + '_oc_6', v + '_oc_12', v + '_dc_6', v + '_dc_12']].max(axis=1)

    total_df['temp_min_combined'] = total_df[['temp_oc_6', 'temp_oc_12', 'temp_dc_6', 'temp_dc_12']].min(axis=1)

    return total_df

def coco_func(x, coco_forecast, new_classes_forecast):
    x_list = x.split(', ')
    for k, v in coco_forecast.items():
        if v in x_list:
            for c, i in new_classes_forecast.items():
                if k in i:
                    return c

def get_weather_data(cities, user_date):

    start_date = pd.Timestamp(user_date).round('H')
    end_date = datetime.now() + timedelta(14)

    start_date_str = datetime.strftime(start_date, '%Y-%m-%d')
    end_date_str = datetime.strftime(end_date, '%Y-%m-%d')

    weather_response = {}
    for city in cities:
        key = '7DYDYYY5GVYHQA52HXFQV5A5Y'
        stations_lat_lon = pd.read_csv('api/data/Deutsche_Bahn_Haltestellen.csv', usecols=['X', 'Y', 'NAME'])
        lat = stations_lat_lon[stations_lat_lon['NAME'] == city]['Y'].mean()
        lon = stations_lat_lon[stations_lat_lon['NAME'] == city]['X'].mean()

        url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{str(lat)},{str(lon)}/{start_date_str}/{end_date_str}"
        params = {'key': key,
                'unitGroup': 'metric'}
        response = requests.get(url, params=params).json()

        weather_response[city] = response
        output_file = 'raw_data/weather_forecast.pickle'
        with open(output_file, 'wb') as handle:
            pickle.dump(weather_response, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [22]:
start_city = 'Koln'
end_city = 'Mannheim'
user_date = '2022-06-10 16:23:00'

total_df = weather_forecast(start_city, end_city, user_date)
total_df

Unnamed: 0,time,temp_oc_6,prcp_oc_6,snow_oc_6,wspd_oc_6,wpgt_oc_6,coco_oc_6,temp_oc_12,prcp_oc_12,snow_oc_12,...,wspd_dc_12,wpgt_dc_12,coco_dc_12,temp_max_combined,prcp_max_combined,snow_max_combined,wspd_max_combined,wpgt_max_combined,coco_max_combined,temp_min_combined
0,2022-06-10 04:00:00,21.4,0.0,0.0,18.0,26.8,1,15.8,0.0,0.0,...,6.5,8.4,1,22.9,0.0,0.0,18.0,26.8,1,15.8


In [7]:
db_path = 'raw_data/weather_forecast.pickle'
with open(db_path, 'rb') as handle:
    weather_response = pickle.load(handle)
weather_response['Mannheim Hbf']

{'queryCost': 1,
 'latitude': 49.4793540000001,
 'longitude': 8.46892100000002,
 'resolvedAddress': '49.4793540000001,8.46892100000002',
 'address': '49.4793540000001,8.46892100000002',
 'timezone': 'Europe/Berlin',
 'tzoffset': 2.0,
 'description': 'Similar temperatures continuing with no rain expected.',
 'days': [{'datetime': '2022-06-10',
   'datetimeEpoch': 1654812000,
   'tempmax': 24.6,
   'tempmin': 7.6,
   'temp': 18.0,
   'feelslikemax': 24.6,
   'feelslikemin': 7.6,
   'feelslike': 18.0,
   'dew': 10.1,
   'humidity': 64.5,
   'precip': 0.0,
   'precipprob': 0.0,
   'precipcover': 0.0,
   'preciptype': None,
   'snow': 0.0,
   'snowdepth': 0.0,
   'windgust': 15.1,
   'windspeed': 7.5,
   'winddir': 199.2,
   'pressure': 1022.7,
   'cloudcover': 31.1,
   'visibility': 16.9,
   'solarradiation': 307.6,
   'solarenergy': 26.6,
   'uvindex': 9.0,
   'severerisk': 10.0,
   'sunrise': '05:20:01',
   'sunriseEpoch': 1654831201,
   'sunset': '21:31:33',
   'sunsetEpoch': 1654889493

In [10]:
weather_response.keys()

dict_keys(['Köln Hbf', 'München Hbf', 'Mannheim Hbf', 'Stuttgart Hbf', 'Würzburg Hbf', 'Frankfurt(Main)Hbf', 'Nürnberg Hbf', 'Essen Hbf', 'Hamburg Hbf', 'Berlin Hbf', 'Hannover Hbf', 'Hagen Hbf', 'Erfurt Hbf', 'Göttingen'])