# Forecast API Request


In [2]:
import pandas as pd
import numpy as np

import sys
import urllib.parse
import requests
from datetime import datetime, date, timezone, timedelta
from sklearn.preprocessing import OneHotEncoder
import datetime

In [3]:
BASE_URL = "https://weather.lewagon.com"
oslo_lat = 59.919602443955355
oslo_lon = 10.752152108688852
#appid = 'ed4479cb1d5ca4806089385ba8f6d277' - Fred's API key

In [4]:
def weather_forecast(lat, lon):
    '''Return a 5-day weather forecast for the city, given its latitude and longitude.'''
    url = urllib.parse.urljoin(BASE_URL, "/data/2.5/forecast")
    forecasts = requests.get(url, params={'lat': lat, 'lon': lon, 'units': 'metric'}).json()['list']
    return forecasts

In [5]:
df_tmp = weather_forecast(oslo_lat, oslo_lon)

In [6]:
df_tmp

[{'dt': 1670155200,
  'main': {'temp': -1.35,
   'feels_like': -1.35,
   'temp_min': -3.87,
   'temp_max': -1.35,
   'pressure': 1035,
   'sea_level': 1035,
   'grnd_level': 1033,
   'humidity': 59,
   'temp_kf': 2.52},
  'weather': [{'id': 802,
    'main': 'Clouds',
    'description': 'scattered clouds',
    'icon': '03d'}],
  'clouds': {'all': 47},
  'wind': {'speed': 1.3, 'deg': 21, 'gust': 1.96},
  'visibility': 10000,
  'pop': 0,
  'sys': {'pod': 'd'},
  'dt_txt': '2022-12-04 12:00:00'},
 {'dt': 1670166000,
  'main': {'temp': -2.88,
   'feels_like': -4.91,
   'temp_min': -5.94,
   'temp_max': -2.88,
   'pressure': 1035,
   'sea_level': 1035,
   'grnd_level': 1033,
   'humidity': 70,
   'temp_kf': 3.06},
  'weather': [{'id': 802,
    'main': 'Clouds',
    'description': 'scattered clouds',
    'icon': '03n'}],
  'clouds': {'all': 34},
  'wind': {'speed': 1.43, 'deg': 14, 'gust': 2.03},
  'visibility': 10000,
  'pop': 0,
  'sys': {'pod': 'n'},
  'dt_txt': '2022-12-04 15:00:00'},
 {'

# Transforming forecast

In [7]:
def check_float(column):
    for index, row in enumerate(column):
        if type(row)==float:
            column[index] = 0
        else:
            column[index] = row['3h']
    return column

In [8]:
def transform_forecast():
    uncleaned_forecast = pd.DataFrame(weather_forecast(oslo_lat, oslo_lon))
    uncleaned_forecast = uncleaned_forecast.drop(columns=['dt','weather','clouds','visibility','pop','sys'])
    
    uncleaned_forecast['temp_min'] = uncleaned_forecast['main'].apply(lambda x: x['temp_min'])
    uncleaned_forecast['temp_max'] = uncleaned_forecast['main'].apply(lambda x: x['temp_max'])
    uncleaned_forecast['wind_speed'] = uncleaned_forecast['wind'].apply(lambda x: x['speed'])
    
    if 'snow' in uncleaned_forecast.columns:
        check_float(uncleaned_forecast['snow'])
    if 'rain' in uncleaned_forecast.columns:
        check_float(uncleaned_forecast['rain'])
    
    uncleaned_forecast = uncleaned_forecast.drop(columns=['main','wind',])
    uncleaned_forecast['dt_txt'] = uncleaned_forecast['dt_txt'].apply(lambda x: x.split(' ')[0])
    
    
    day_grouped_forecast = uncleaned_forecast.groupby('dt_txt').agg({'temp_min':'min',
                                                                    'temp_max':'max', 
                                                                    'wind_speed':'mean'})
    
    if 'snow' in uncleaned_forecast.columns:
        day_grouped_forecast['snow_total'] = uncleaned_forecast.groupby('dt_txt').agg({'snow':'sum'})['snow']
    else:
        day_grouped_forecast['snow_total'] = 0
    if 'rain' in uncleaned_forecast.columns:
        day_grouped_forecast['rainfall_total'] = uncleaned_forecast.groupby('dt_txt').agg({'rain':'sum'})['rain']
    else:
        day_grouped_forecast['rainfall_total'] = 0
    
    cleaned_forecast = day_grouped_forecast.reset_index()
    cleaned_forecast = cleaned_forecast.rename({'dt_txt':'date', 
                                                'wind_speed':'wind_speed_avg'}, axis=1)
    
    cleaned_forecast['date'] = pd.to_datetime(cleaned_forecast['date'])
    cleaned_forecast['day_of_week'] = cleaned_forecast['date'].dt.dayofweek
    cleaned_forecast['Month'] = cleaned_forecast['date'].dt.month
    
    
    return cleaned_forecast

In [6]:
def encode_day_month(X, col_list):
    ohe = OneHotEncoder(sparse = False) 
    ohe.fit(X[col_list]) 
    X[ohe.get_feature_names_out()] = ohe.transform(X[col_list])
    return X.drop(columns = col_list)

In [7]:
cleaned_forecast = transform_forecast()

In [8]:
cleaned_forecast = encode_day_month(cleaned_forecast, ['day_of_week', 'Month'])

In [9]:
cleaned_forecast

Unnamed: 0,date,temp_min,temp_max,wind_speed_avg,snow_total,rainfall_total,day_of_week_0,day_of_week_1,day_of_week_2,day_of_week_3,day_of_week_4,day_of_week_6,Month_12
0,2022-12-04,-7.09,-1.35,1.3725,0,0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
1,2022-12-05,-8.17,-5.37,0.90625,0,0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2022-12-06,-6.96,-2.32,1.2825,0,0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,2022-12-07,-8.34,-1.85,1.7125,0,0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,2022-12-08,-9.9,-6.98,1.04875,0,0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
5,2022-12-09,-10.11,-9.01,0.9875,0,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


# Test for day selection

In [10]:
def date_selector(user_date):
    transform_forecast()[transform_forecast().date == user_date]

# Continue processing

In [24]:
type(cleaned_forecast['date'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [22]:
today=datetime.today()
#td = timedelta(days=5)
max_date = today + timedelta(days=2)

In [23]:
max_date

datetime.datetime(2022, 12, 6, 13, 1, 37, 442782)

# experiments

In [28]:
import schedule, time
import webbrowser
from datetime import datetime, timedelta
import threading
from pytz import timezone

In [42]:
def open_video():
    webbrowser.open('http://youtube.com')

In [43]:
schedule.every(.2).minutes.do(open_video)
while True:
    schedule.run_pending()
    time.sleep(1)

hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello


KeyboardInterrupt: 

In [65]:
listo = [5,3,5,4]

In [68]:
date = datetime.today()
for row in forecast_df:
    td = timedelta(days=1)
    name = f"{str(date).split(' ')[0]}.csv"
    date += timedelta(days=1)
    print(name)
    

2022-12-05.csv
2022-12-06.csv
2022-12-07.csv
2022-12-08.csv
