In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
import pickle
import numpy as np

In [2]:
weather = pd.read_csv("local_weather_adis.csv", index_col="DATE")

In [3]:
#PREPARING THE DATA FOR MACHINE LEARNING
#CHECKINg NULL VALUE IN THE DATA SET
weather.apply(pd.isnull).sum() 

STATION        0
NAME           0
PRCP        6185
SNWD       18801
TAVG        6520
TMAX        4716
TMIN        3579
dtype: int64

In [4]:
weather.apply(pd.isnull).sum()/weather.shape[0]

STATION    0.000000
NAME       0.000000
PRCP       0.328919
SNWD       0.999840
TAVG       0.346735
TMAX       0.250798
TMIN       0.190332
dtype: float64

In [5]:
#SELECT CORE VALUE
core_weather = weather[["PRCP","SNWD","TMAX","TMIN"]].copy()

In [6]:
#RENAME THE ATRIBUTE FOR CORE VALUE
core_weather.columns = ["precip","snow_depth","temp_max","temp_min"]

In [7]:
#FILLING IN MISSING VALUES
core_weather.apply(pd.isnull).sum()/core_weather.shape[0]

precip        0.328919
snow_depth    0.999840
temp_max      0.250798
temp_min      0.190332
dtype: float64

In [8]:
#delate snow_depth
del core_weather["snow_depth"]

In [9]:
#REPLACE NULL VALUE BY 0
core_weather["precip"] = core_weather["precip"].fillna(0)

In [10]:
#FIND THE PRIVIES VALUE THAT IS NOT NULL replace the null value
core_weather = core_weather.fillna(method="ffill")

In [11]:
#CHECKING THE NULL VALUE
core_weather.apply(pd.isnull).sum()/core_weather.shape[0]

precip      0.0
temp_max    0.0
temp_min    0.0
dtype: float64

In [12]:
#VERIFIYING WE HAVE THE CORRECT DATA TYPES
core_weather.dtypes

precip      float64
temp_max    float64
temp_min    float64
dtype: object

In [13]:
#convert to date time index
core_weather.index = pd.to_datetime(core_weather.index)

In [14]:
#9999 INDICATE MISSING DATA OR DATA THAT HAS NOT BEEN RECEIVED
core_weather.apply(lambda x: (x==9999).sum())

precip      0
temp_max    0
temp_min    0
dtype: int64

In [15]:
#TRAINING OUR FIRST MACHIN LEARNING MODEL
#THE TARGET IS TOMOROW TEMP MAX
core_weather["target"] = core_weather.shift(-1)["temp_max"]

In [16]:
#REMOVING THE LAST ROW
core_weather = core_weather.iloc[:-1,:].copy()

In [17]:
predictors = ["precip","temp_max","temp_min"]

In [18]:
x = core_weather[predictors]

In [19]:
y = core_weather["target"]

In [20]:
X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size=0.2,random_state=10)

In [21]:

reg = Ridge(alpha=.1)

In [22]:
reg.fit(X_train, Y_train)

Ridge(alpha=0.1)

In [23]:
predictions = reg.predict(X_test)

In [24]:
mean_absolute_error(Y_test,predictions)

1.5804781633860372

In [29]:
pickle.dump(reg,open('weather_model.pkl','wb'))
model=pickle.load(open('weather_model.pkl','rb'))