In [2]:
#import modules
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import matplotlib.pylab as plt
import seaborn as sns
import credentials
import datetime
from datetime import date
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
#access database
URI = "dublinbikeappdb.cxaxe40vwlui.us-east-1.rds.amazonaws.com"
DB = "dbikes1"
name = credentials.name
pw = credentials.password

"""Initate connection"""
engine = create_engine("mysql+mysqlconnector://{}:{}@{}:3306/{}".format(name,pw,URI,DB),echo=False)

In [4]:
#get infor from db (testing with station number 4)
n = 4
availability = '''SELECT *
FROM availability
WHERE number = {}
'''.format(n)

weather = '''SELECT *
FROM weather
'''

select = """SELECT s.number
FROM stations s"""

In [5]:
#convert db tables to pandas dataframes
dfs = pd.read_sql_query(select, engine)
dfa = pd.read_sql_query(availability, engine)
dfw = pd.read_sql_query(weather, engine)

In [6]:
#merge weather and occupancy tables together
df = pd.merge_asof(dfa, dfw, left_on="last_update", right_on="last_update")

In [7]:
#transfor features
df["time"] = df["last_update"]
df["day"] = df["last_update"]

for i in range(df.shape[0]):
    df["time"].loc[i]= df.iloc[i]["last_update"].hour
    df["day"].loc[i] = df.iloc[i]["last_update"].dayofweek
    x= df.iloc[i]["last_update"].dayofweek

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [8]:
# implement weather data
#df["good_weather"] = np.logical_or(df["description"] == "Clouds", df["description"] == "Clear")
#df.good_weather.replace({True:1, False:0},inplace=True)

In [9]:
# create test and train sets
X = df[["time","day","humidity","temp"]]
y = df.available_bikes

X_train, X_test, y_train , y_test = train_test_split(X,y, test_size=0.3, random_state=0)

In [10]:
#linear model
model = LinearRegression()
model.fit(X_train, y_train)

print(model.coef_)
print(model.intercept_)

[-0.06256574 -0.79257852 -0.01801592 -0.15573098]
13.089413038329344


In [11]:
#run test set
model_predicitons = model.predict(X_test)
actual_vs_predicted = pd.concat([y_test, pd.DataFrame(model_predicitons,columns=["Predicted"],index=y_test.index)],axis=1)

In [12]:
#compute error metrics for test set
actual_vs_predicted["squared_error"] = (actual_vs_predicted["available_bikes"] - actual_vs_predicted["Predicted"])**2
rmse_test = ((actual_vs_predicted["squared_error"].sum())/actual_vs_predicted.shape[0])**0.5
mae_test = ((actual_vs_predicted["squared_error"])**0.5).sum()/actual_vs_predicted.shape[0]

print("RMSE",rmse_test)
print("MAE",mae_test)

RMSE 3.7619375677049525
MAE 2.9196224306852954


In [31]:
#colate this for single function
def get_model(n):
    """returns a model with error scores using the test set"""
    #get infor from db
    availability = '''SELECT *
    FROM availability
    WHERE number = {}
    '''.format(n)

    weather = '''SELECT *
    FROM weather
    '''

    select = """SELECT s.number
    FROM stations s"""
    
    #convert db tables to pandas dataframes
    dfs = pd.read_sql_query(select, engine)
    dfa = pd.read_sql_query(availability, engine)
    dfw = pd.read_sql_query(weather, engine)
    
    #merge weather and occupancy tables together
    df = pd.merge_asof(dfa, dfw, left_on="last_update", right_on="last_update")
    
    #transform existing features
    df["time"] = df["last_update"]
    df["day"] = df["last_update"]

    for i in range(df.shape[0]):
        df["time"].loc[i]= df.iloc[i]["last_update"].hour
        df["day"].loc[i] = df.iloc[i]["last_update"].dayofweek
    
    # create test and train sets
    X = df[["time","day","humidity","temp"]]
    y = df.available_bikes

    X_train, X_test, y_train , y_test = train_test_split(X,y, test_size=0.3, random_state=0)
    
    #linear model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    #run test set
    model_predicitons = model.predict(X_test)
    actual_vs_predicted = pd.concat([y_test, pd.DataFrame(model_predicitons,columns=["Predicted"],index=y_test.index)],axis=1)
    
    actual_vs_predicted["squared_error"] = (actual_vs_predicted["available_bikes"] - actual_vs_predicted["Predicted"])**2
    rmse_test = ((actual_vs_predicted["squared_error"].sum())/actual_vs_predicted.shape[0])**0.5
    mae_test = ((actual_vs_predicted["squared_error"])**0.5).sum()/actual_vs_predicted.shape[0]
    
    #get model using full dataset
    model = LinearRegression()
    model.fit(X, y)
    results = [model.coef_,model.intercept_,{"rmse":rmse_test, "mae":mae_test}]
    
    return results
                

In [32]:
stations = dfs.number.tolist()
stations[:3]

[2, 3, 4]

In [33]:
#trains all models, and returns error scores
#WARNING. THIS FUNCTION WILL TAKE A LONG TIME AS YOU ARE RUNNING ALL CODE FEATURED IN THIS NOTEBOOK FOR
#EVERY MODE IN THE DATASET. However long it took you to run all code thus far, this code will repeat those task
#for over 100 stations instead of just 1.
models = {}
for s in stations[:3]:
    models[s] = get_model(s)
    print("Station number :",s," Complete")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Station number : 2  Complete


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Station number : 3  Complete


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Station number : 4  Complete


In [34]:
import pickle

In [35]:
pickle_out = open("flaskapp/models.pkl", "wb")
pickle.dump(models, pickle_out)
pickle_out.close()

In [36]:
pickle_in = open("flaskapp/models.pkl", "rb")
models = pickle.load(pickle_in)

print(models[2])

[array([ 0.04597102, -0.48784152,  0.07280049, -0.19591035]), 3.0878538524809, {'rmse': 3.9455992006821177, 'mae': 3.3978339441300944}]


In [33]:
inter = models[2][1]
inter

3.0481319286561304

In [37]:
coef = models[2][0]
coef

array([ 0.04597102, -0.48784152,  0.07280049, -0.19591035])

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError

In [35]:
hour = 1
dic = {'t': 2, 'humid': 2, 'd': 5}

In [38]:
dic['d']

5

In [43]:
coef["rmse"]

3.9418967949007646

In [47]:
models[3]

[array([ 0.01039905,  0.01896137, -0.01265127, -0.14213996]),
 6.692399979934559,
 {'rmse': 4.055352064805854, 'mae': 3.3204256199709232}]

In [45]:
inter + (hour * coef["rmse"]) + (dic['d'] * coef["mae"]) + (dic['humid'] * coef["mae"]) + (dic['t'] * coef["mae"])

37.34809495742671

In [57]:
models[2][0][0]

0.04603646478678425

In [58]:
coef = models[2][0]

In [59]:
coef[0]

0.04603646478678425

In [63]:
def model():
    pickle_in = open("models.pickle", "rb")
    models = pickle.load(pickle_in)

    hour = 1
    dic = {'t': 2, 'humid': 2, 'd': 5}
    inter = models[2][1]
    coef = models[2][0]
    result = inter + (hour * coef[0]) + (dic['d'] * coef[1]) + (dic['humid'] * coef[2]) + (dic['t'] * coef[3])
    print(result)

    return result  # A float which we display

In [64]:
model()

0.35361422869163195


0.35361422869163195

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\hassa\miniconda3\envs\dbikes\lib\asyncio\selector_events.py", line 119, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError