In [1014]:
# imports

import datetime
from datetime import datetime as dttm
from datetime import date
from datetime import timedelta
import matplotlib.pyplot as plt
from meteostat import Point, Daily
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=20,10
from geopy.geocoders import Nominatim
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

In [1015]:
# location name to coordinates

def coord(location):
    geolocator = Nominatim(user_agent = "predictor")
    return geolocator.geocode(location)

In [1016]:
# set time period

today = date.today()
de = int((today + timedelta(1)).strftime("%d"))
me = int(today.strftime("%m"))
ye = int(today.strftime("%Y"))
start = dttm(2021, 1, 2)
end = dttm(ye, me, de)

# get place

city = input('Enter a city - ')
x = coord(city)
lat = int(x.latitude)
lon = int(x.longitude)
place = Point(lat, lon)
#place = Point(21.2120677, 81.3732849)

# get data

data = Daily(place, start, end)
data = data.fetch()

# put data in a dataframe

weather = pd.DataFrame(data)
weather.drop(['tavg', 'snow', 'wspd', 'wdir', 'wpgt', 'pres', 'tsun'], axis = 1, inplace = True)
#print(weather)

Enter a city - dharamshala


In [1017]:
# remove NULL values

weather.apply(pd.isnull).sum()
weather['prcp'] = weather['prcp'].fillna(0)
weather[pd.isnull(weather['tmax'])]
weather = weather.fillna(method = 'ffill')
#weather.apply(pd.isnull).sum()

In [1018]:
# specify target data for prediction

weather['target_max'] = weather.shift(-1)['tmax']
weather['target_min'] = weather.shift(-1)['tmin']
m1 = weather.target_max[-2:-9:-1].mean()
m2 = weather.target_min[-2:-9:-1].mean()
weather.target_max[-1] = m1
weather.target_min[-1] = m2

In [1019]:
# set ridge regression model

reg_max = Ridge(alpha = 0.1)
predictors = ['tmax', 'tmin']
train = weather.loc[:'2022-10-31']
test = weather.loc['2022-11-01':]
reg_max.fit(train[predictors], train['target_max'])
predictions = reg_max.predict(test[predictors])

reg_min = Ridge(alpha = 0.1)
predictors = ['tmax', 'tmin']
train = weather.loc[:'2022-10-31']
test = weather.loc['2022-11-01':]
reg_min.fit(train[predictors], train['target_min'])
predictions = reg_min.predict(test[predictors])

In [1020]:
# generalizing the predictor model

def predictor_max(predictors, weather, reg):
    train = weather.loc[:'2022-10-31']
    test = weather.loc['2022-11-01':]
    reg_max.fit(train[predictors], train['target_max'])
    predictions = reg_max.predict(test[predictors])
    error_max = mean_absolute_error(test['target_max'], predictions)
    combined_max = pd.concat([test['target_max'], pd.Series(predictions, index = test.index)], axis = 1)
    combined_max.columns = ['previous_max', 'predictions_max']
    return error_max, combined_max

def predictor_min(predictors, weather, reg):
    train = weather.loc[:'2022-10-31']
    test = weather.loc['2022-11-01':]
    reg_min.fit(train[predictors], train['target_min'])
    predictions = reg_min.predict(test[predictors])
    error_min = mean_absolute_error(test['target_min'], predictions)
    combined_min = pd.concat([test['target_min'], pd.Series(predictions, index = test.index)], axis = 1)
    combined_min.columns = ['previous_min', 'predictions_min']
    return error_min, combined_min

In [1021]:
# calculate error and predictions based on the parameters

weather['month_max_max'] = weather['tmax'].rolling(30).mean()
weather['month_day_max_max'] = weather['month_max_max'] / weather['tmax']
weather['max_min_max'] = weather['tmax'] / weather['tmin']
weather = weather.iloc[30:,:].copy()
weather['monthly_avg_max'] = weather['tmax'].groupby(weather.index.month, group_keys=False).apply(lambda x : x.expanding(1).mean())
weather['day_of_year_avg_max'] = weather['tmax'].groupby(weather.index.day_of_year, group_keys=False).apply(lambda x : x.expanding(1).mean())
predictors_max = ['tmax', 'tmin', 'month_max_max', 'month_day_max_max', 'max_min_max', 'day_of_year_avg_max', 'monthly_avg_max']

weather['month_max_min'] = weather['tmin'].rolling(30).mean()
weather['month_day_max_min'] = weather['month_max_min'] / weather['tmin']
weather['max_min_min'] = weather['tmax'] / weather['tmin']
weather = weather.iloc[30:,:].copy()
weather['monthly_avg_min'] = weather['tmin'].groupby(weather.index.month, group_keys=False).apply(lambda x : x.expanding(1).mean())
weather['day_of_year_avg_min'] = weather['tmin'].groupby(weather.index.day_of_year, group_keys=False).apply(lambda x : x.expanding(1).mean())
predictors_min = ['tmax', 'tmin', 'month_max_min', 'month_day_max_min', 'max_min_min', 'day_of_year_avg_min', 'monthly_avg_min']

error_max, combined_max = predictor_max(predictors_max, weather, reg)
error_min, combined_min = predictor_min(predictors_min, weather, reg)

In [1022]:
print("Tomorrow's maximum temperature - ", combined_max.predictions_max[-1], chr(176), 'C', sep = '')
print("Tomorrow's minimum temperature - ", combined_min.predictions_min[-1], chr(176), 'C', sep = '')
print("Error in maximum temperature   - ", error_max, chr(176), 'C', sep = '')
print("Error in minimum temperature   - ", error_min, chr(176), 'C', sep = '')

Tomorrow's maximum temperature - 22.91227218556851°C
Tomorrow's minimum temperature - 14.902488388050458°C
Error in maximum temperature   - 1.7659691375486832°C
Error in minimum temperature   - 1.9493488621388804°C
