In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
from joblib import dump, load
import numpy as np
from datetime import datetime

In [2]:
# Get from rachtest.csv -> leteamcleaned.csv
df = pd.read_csv("rachtest.csv")
df = df.drop(['weather_id', 'main_feels_like', 'main_temp_max', 'main_temp_min', 'main_pressure', 'main_humidity', 'test1'], 1)
df = df.rename(columns={"number": "station_number", "main_temp": "temp"})

# categorical -> continuous
cleanup_nums = {"weather_main": {"Clouds": 0, "Clear": 3, "Rain": 2, "Mist": 0, "Drizzle": 1}}
df = df.replace(cleanup_nums)

# splitting datetime up
df['Date_time'] = df['dt'].astype('datetime64[ns]')
df['hour'] = df.Date_time.dt.hour
df['day'] = df.Date_time.dt.weekday

df = df.drop(['dt', 'Date_time'], 1)

df.to_csv('leteamcleaned.csv') # , index=False

In [None]:
	station_number	available_bike_stands	available_bikes	weather_main	temp	wind_speed	hour	day
569	    2	            11	                    9	             0	        280.77	   4.63	    4	    1
1034	2	             14	                    5	             2	        285.47	    6.69	13	    2
348310	26	            5	                    15	             0	        281.42	   2.57	     5	     2
430183	67	            2	                    37	             3	         279.94	    5.14	20	     3
440510	73	            27	                     3	              0	        283.5	    5.14	  3	     1
531852	117	            39	                     1 	               0	    286.65	    9.26	  15	 6

In [4]:
df = pd.read_csv("leteamcleanedremoved.csv")

In [7]:
def create_models(df):
    station_numbers = df.station_number.unique()
    for station_number in station_numbers:  # [26, 67, 73, 117]
        # select the records for that station number only 
        data = df.loc[df['station_number'] == station_number]

        features_training = ["station_number", "day", "hour", 'weather_main', 'temp', 'wind_speed']
        feature_target1 = ['available_bikes']
        feature_target2 = ['available_bike_stands']

        train_features1, test_features1, train_labels1, test_labels1 = train_test_split(data[features_training], data[feature_target1], test_size=0.3)
        train_features2, test_features2, train_labels2, test_labels2 = train_test_split(data[features_training], data[feature_target2], test_size=0.3)

        rfr1 = RandomForestRegressor(n_estimators=10, random_state=0, max_depth=28, max_features=4) 
        rfr1 = rfr1.fit(train_features1, train_labels1.available_bikes)

        rfr2 = RandomForestRegressor(n_estimators=10, random_state=0, max_depth=27, max_features=4)
        rfr2 = rfr2.fit(train_features2, train_labels2.available_bike_stands)

        dump(rfr1, 'models/available_bikes_station_' + str(station_number) + '.joblib')
        dump(rfr2, 'models/available_bike_stands_station_' + str(station_number) + '.joblib')

In [8]:
create_models(df)

In [1]:
def printRes(arr):
    train_data = []
    train_data.append(arr)  
    rfr1 = load('models/available_bikes_station_' + str(arr[0]) + '.joblib')
    available_bike = rfr1.predict(train_data)
    print(int(available_bike))

    rfr2 = load('models/available_bike_stands_station_' + str(arr[0]) + '.joblib')
    available_stands = rfr2.predict(train_data)
    print(int(available_stands))

Records removed from csv

In [10]:
# Available_bikes 9 , Available_bike_stands 11
arr = [2, 1, 4, 0, 280.77, 4.63]
printRes(arr)

9
11


In [11]:
# Available_bikes 5 , Available_bike_stands 14
arr = [2, 2, 13, 2, 285.47, 6.69]
printRes(arr)

5
13


In [12]:
# Available_bikes 15 , Available_bike_stands 5
arr = [26, 2, 5, 0, 281.42, 2.57]
printRes(arr)

15
5


In [13]:
# Available_bikes 37 , Available_bike_stands 2
arr = [67, 3, 20, 3, 279.94, 5.14]
printRes(arr)

36
2


In [14]:
# Available_bikes 3 , Available_bike_stands 27
arr = [73, 1, 3, 0,  283.5, 5.14]
printRes(arr)

3
27


In [15]:
# Available_bikes 1 , Available_bike_stands 39
arr = [117, 6, 15, 0, 286.65, 9.26]
printRes(arr)

1
38


Right now (06/04/2021 6pm) looking at app!!

In [17]:
# Available_bikes 17 , Available_bike_stands 23
#["station_number", "day", "hour", 'weather_main', 'temp', 'wind_speed']
#["TALBOT STREET", "Tuesday", "5:50pm", 'Clouds', 'temp', 'wind_speed']
arr = [38, 2, 18, 0, 280.17, 9.26]
printRes(arr)

24
23


In [18]:
# COLLINS BARRACKS MUSEUM
# Available_bikes 8 , Available_bike_stands 30
arr = [87, 2, 18, 0, 280.17, 9.26]
printRes(arr)

10
27


In [None]:
# COLLINS BARRACKS MUSEUM
# Available_bikes 8 , Available_bike_stands 30
arr = [87, 2, 18, 0, 280.17, 9.26]
printRes(arr)

In [19]:
# GRANGEGORMAN LOWER (CENTRAL)
# Available_bikes 11 , Available_bike_stands 29
arr = [104, 2, 18, 0, 280.17, 9.26]
printRes(arr)

11
27


End to end testing

In [26]:
#["station_number", "day", "hour", 'weather_main', 'temp', 'wind_speed']
# 2021-04-13 17:41:00
# Rain = 2 
# temp_eve = '282.84'
# /predict/67/1/17/Rain/282.84/4.73
arr = [67, 1, 17, 2, 282.84, 4.73]
printRes(arr)

20
22
