In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing

In [2]:
# read in data
weather = pd.read_csv('weatherHistory.csv')

In [3]:
dummy_precip = pd.get_dummies(weather['Precip Type'])
weather['rain'] = dummy_precip['rain']
weather['snow'] = dummy_precip['snow']

In [4]:
# get only features to use
weather = weather.drop(columns=['Formatted Date', 'Summary', 'Daily Summary', 'Loud Cover', 'Precip Type', 'Temperature (C)'])
weather = weather.reset_index(drop=True)

In [5]:
# min max normalize features
min_max_scaler = preprocessing.MinMaxScaler()
weather['Humidity'] = min_max_scaler.fit_transform(pd.DataFrame(weather['Humidity']))
weather['Wind Speed (km/h)'] = min_max_scaler.fit_transform(pd.DataFrame(weather['Wind Speed (km/h)']))
weather['Wind Bearing (degrees)'] = min_max_scaler.fit_transform(pd.DataFrame(weather['Wind Bearing (degrees)']))
weather['Visibility (km)'] = min_max_scaler.fit_transform(pd.DataFrame(weather['Visibility (km)']))
weather['Pressure (millibars)'] = min_max_scaler.fit_transform(pd.DataFrame(weather['Pressure (millibars)']))

In [6]:
weather.describe()

Unnamed: 0,Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),rain,snow
count,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0
mean,10.855029,0.734899,0.169306,0.52231,0.642691,0.958768,0.883581,0.111059
std,10.696847,0.195473,0.108274,0.299118,0.26038,0.111785,0.320729,0.314207
min,-27.716667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.311111,0.6,0.091276,0.32312,0.518,0.967048,1.0,0.0
50%,12.0,0.78,0.156077,0.501393,0.624,0.971397,1.0,0.0
75%,18.838889,0.89,0.221382,0.807799,0.92,0.975831,1.0,0.0
max,39.344444,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
weather.head(10)

Unnamed: 0,Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),rain,snow
0,7.388889,0.89,0.22113,0.699164,0.983,0.970135,1,0
1,7.227778,0.86,0.223399,0.721448,0.983,0.970613,1,0
2,9.377778,0.89,0.061523,0.568245,0.929,0.970909,1,0
3,5.944444,0.83,0.220877,0.749304,0.983,0.971358,1,0
4,6.977778,0.83,0.17297,0.721448,0.983,0.971454,1,0
5,7.111111,0.85,0.218608,0.718663,0.929,0.971597,1,0
6,5.522222,0.95,0.193646,0.721448,0.62,0.971655,1,0
7,6.527778,0.89,0.221634,0.724234,0.62,0.971769,1,0
8,10.822222,0.82,0.177257,0.721448,0.62,0.972276,1,0
9,13.772222,0.72,0.196167,0.777159,0.62,0.972132,1,0


In [8]:
weather_x = weather.drop(columns=['Apparent Temperature (C)'])
weather_y = weather['Apparent Temperature (C)']

In [9]:
reg_lasso = linear_model.Lasso(alpha=0.01)
reg_lasso.fit(weather_x, weather_y)

for i in range(len(weather_x.columns)):
    param = weather_x.columns[i]
    param_weight = reg_lasso.coef_[i]
    print("{}: {}".format(param, param_weight))
    
np.sum(np.power(reg_lasso.coef_, 2))**0.5

Humidity: -28.289863567870828
Wind Speed (km/h): -20.090686340260042
Wind Bearing (degrees): 0.9241858431930028
Visibility (km): 2.8885198487158306
Pressure (millibars): -0.0
rain: 1.9476939378492482
snow: -12.803954400915929


37.16025409865909

In [10]:
reg_lasso = linear_model.Lasso(alpha=1)
reg_lasso.fit(weather_x, weather_y)

for i in range(len(weather_x.columns)):
    param = weather_x.columns[i]
    param_weight = reg_lasso.coef_[i]
    print("{}: {}".format(param, param_weight))
    
np.sum(np.power(reg_lasso.coef_, 2))**0.5

Humidity: -3.587261564816748
Wind Speed (km/h): -0.0
Wind Bearing (degrees): 0.0
Visibility (km): 0.0
Pressure (millibars): 0.0
rain: 2.333379482316954
snow: -6.275414864934318


7.595652511137398

In [11]:
reg_ridge = linear_model.Ridge(alpha=0.01)
reg_ridge.fit(weather_x, weather_y)

for i in range(len(weather_x.columns)):
    param = weather_x.columns[i]
    param_weight = reg_ridge.coef_[i]
    print("{}: {}".format(param, param_weight))
    
np.sum(np.power(reg_ridge.coef_, 2))**0.5

Humidity: -28.62183195789525
Wind Speed (km/h): -21.19131148511231
Wind Bearing (degrees): 1.0681890081477872
Visibility (km): 2.9860112058543464
Pressure (millibars): -0.7551233830541437
rain: 1.8504377886401344
snow: -12.946275584189197


38.07805956976666

In [12]:
reg_ridge = linear_model.Ridge(alpha=1000)
reg_ridge.fit(weather_x, weather_y)

for i in range(len(weather_x.columns)):
    param = weather_x.columns[i]
    param_weight = reg_ridge.coef_[i]
    print("{}: {}".format(param, param_weight))
    
np.sum(np.power(reg_ridge.coef_, 2))**0.5

Humidity: -21.04647172645577
Wind Speed (km/h): -9.607470325045963
Wind Bearing (degrees): 0.5365715482748343
Visibility (km): 4.043566383064146
Pressure (millibars): -0.22814976826714897
rain: 6.166753446203872
snow: -8.350533187801373


25.68478345642426