In [10]:
#!/usr/bin/env python
# coding: utf-8
import sklearn
import dateutil
import sqlite3
from sqlite3 import Error
from sklearn import preprocessing
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import ElasticNetCV, LassoLarsCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import PolynomialFeatures
from tpot.builtins import StackingEstimator
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
import platform
import os
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split


class DemandTrain:
    def __init__(self):
        # try:
        #     if platform.system() == 'Windows':
        self.Sql_connection1 = self.create_connection('/opt/apps/scripts/jaarvis_demand_supply/evo.db')

        #     elif platform.system() == 'Linux':
        #         self.Sql_connection1 = self.create_connection('/opt/apps/scripts/jaarvis_demand_supply/evo.db')
        # except Error as e:
        #     print(e)

        self.df = pd.read_sql_query("select * from zones;", self.Sql_connection1)

    def create_connection(self, db_file):
        """ create a database connection to the SQLite database
            specified by the db_file
        :param db_file: database file
        :return: Connection object or None
        """
        try:
            conn = sqlite3.connect(db_file)
            return conn
        except Error as e:
            print(e)
        return None

    def data_cleansing(self):
        self.df = self.df.dropna(how='any')
        self.df.time = self.df.time.astype(str).str[:2].astype(int)
        self.df.time = self.df.time.replace(regex=True, inplace=True, to_replace=r'\D', value=r'')
        self.df.time = [(x or -1) for x in self.df.time]
        self.df.zone = [(x or 'unknown') for x in self.df.zone]
        self.df.year = [(x or 0) for x in self.df.year]
        self.df.month = [(x or 0) for x in self.df.month]
        self.df.week = [(x or 0) for x in self.df.week]
        self.df.day = [(x or 0) for x in self.df.day]
        self.df.booked_vehicles = [(x or 0) for x in self.df.booked_vehicles]
        self.df.free_vehicles = [(x or 0) for x in self.df.free_vehicles]

        self.df = self.df.mask(self.df.eq('None')).dropna()
        self.df = self.df.fillna(0)

        return self.df

    def feature_addition(self):
        # no feature added
        pass

    def feature_elimination(self):
        # no feature deleted
        pass

    def training_data(self):
        self.data_cleansing()
        df=self.df
        le = preprocessing.LabelEncoder()
        le = le.fit(df.zone)
        numpy.save('classes.npy', le.classes_)
        pd.DataFrame({'zone': df['zone'].unique()}).to_csv("zones_list.csv")
        df['zone'] = le.transform(df.zone.get_values())
        X = df[['zone', 'year','month', 'week','day', 'time']]       
#         X = df[['zone', 'day', 'time']]
        X = X.values
        sc_X = StandardScaler()
        X = sc_X.fit_transform(X)
        joblib.dump(sc_X,"scaler.save") 
        demand = df.booked_vehicles.values
        free_vehicles = df.free_vehicles.values
        return X, demand, free_vehicles

    def trainRF(self):
        X, demand, free_vehicles = self.training_data()
        demand_Xtrain, demand_Xtest, demand_ytrain, demand_ytest = train_test_split(X, demand, train_size=0.9, test_size=0.1)
        demand_pipeline = make_pipeline(
        StackingEstimator(estimator=ElasticNetCV(l1_ratio=0.75, tol=0.01, cv=100)),
        StackingEstimator(estimator=LassoLarsCV(normalize=True, cv=100)),
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.99, learning_rate=0.01, loss="lad", max_depth=6, max_features=0.7500000000000001, min_samples_leaf=18, min_samples_split=13, n_estimators=100, subsample=0.05)),
        PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
        RandomForestRegressor(bootstrap=True, max_features=0.8500000000000001, min_samples_leaf=18, min_samples_split=11, n_estimators=100)
        )

        demand_pipeline.fit(demand_Xtrain, demand_ytrain)
        
        available_Xtrain, available_Xtest, available_ytrain, available_ytest = train_test_split(X, free_vehicles, train_size=0.9, test_size=0.1)
        
        available_pipeline = make_pipeline(
        StackingEstimator(estimator=ElasticNetCV(l1_ratio=0.75, tol=0.01, cv=100)),
        StackingEstimator(estimator=LassoLarsCV(normalize=True,cv=100)),
        StackingEstimator(estimator=GradientBoostingRegressor(alpha=0.99, learning_rate=0.01, loss="lad", max_depth=6, max_features=0.7500000000000001, min_samples_leaf=18, min_samples_split=13, n_estimators=100, subsample=0.05)),
        PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
        RandomForestRegressor(bootstrap=True, max_features=0.8500000000000001, min_samples_leaf=18, min_samples_split=11, n_estimators=100)
        )

        available_pipeline.fit(available_Xtrain, available_ytrain)
        
        
        print(demand_pipeline.score(demand_Xtest, demand_ytest))
        print(available_pipeline.score(available_Xtest,available_ytest))
        
        pickle.dump(demand_pipeline, open("demand_RF_model.sav", 'wb'))
        pickle.dump(available_pipeline, open("free_vehicles_RF_model.sav", 'wb'))


TrainingObj = DemandTrain()
TrainingObj.trainRF()



0.5018367687875771
0.6822356182148347


In [3]:
import numpy
import sklearn
import dateutil
import sqlite3
from sqlite3 import Error
from sklearn import preprocessing
import pandas as pd
import pickle
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import platform
import os
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split


In [4]:
def create_connection( db_file):
        """ create a database connection to the SQLite database
            specified by the db_file
        :param db_file: database file
        :return: Connection object or None
        """
        try:
            conn = sqlite3.connect(db_file)
            return conn
        except Error as e:
            print(e)
        return None

In [46]:
Sql_connection1 = create_connection('/opt/apps/scripts/jaarvis_demand_supply/evo.db')

In [47]:
train_df = pd.read_sql_query("select * from zones order by id", Sql_connection1)

In [155]:
def data_cleansing():
        global train_df
        train_df = train_df.dropna(how='any')
        train_df.time = train_df.time.astype(str).str[:2].astype(int)
        train_df.time = train_df.time.replace(regex=True, inplace=True, to_replace=r'\D', value=r'')
        train_df.time = [(x or -1) for x in train_df.time]
        train_df.zone = [(x or 'unknown') for x in train_df.zone]
        train_df.year = [(x or 0) for x in train_df.year]
        train_df.month = [(x or 0) for x in train_df.month]
        train_df.week = [(x or 0) for x in train_df.week]
        train_df.day = [(x or 0) for x in train_df.day]
        train_df.booked_vehicles = [(x or 0) for x in train_df.booked_vehicles]
        train_df.free_vehicles = [(x or 0) for x in train_df.free_vehicles]

        train_df = train_df.mask(train_df.eq('None')).dropna()
        train_df = train_df.fillna(0)

        return train_df

In [156]:
 def training_data():

        global train_df
        data_cleansing()
        le = preprocessing.LabelEncoder()
        le = le.fit(train_df.zone)
        numpy.save('classes.npy', le.classes_)
        pd.DataFrame(train_df['zone'].unique(), columns=['zones']).to_csv("zones_list.csv")
        train_df['zone'] = le.transform(train_df.zone.get_values())

        X = train_df[['zone', 'year', 'month', 'week', 'day', 'time']]
        X = X.values
        demand = train_df.booked_vehicles.values
        free_vehicles = train_df.free_vehicles.values

        return X, demand, free_vehicles
training_data()

(array([[1233, '2018', 11, 45, 5, -1],
        [234, '2018', 11, 45, 5, -1],
        [1059, '2018', 11, 45, 5, -1],
        ...,
        [116, '2018', 11, 48, 28, -1],
        [750, '2018', 11, 48, 28, -1],
        [126, '2018', 11, 48, 28, -1]], dtype=object),
 array([0., 0., 0., ..., 1., 1., 1.]),
 array([1, 1, 1, ..., 0, 1, 1]))

In [None]:
import flask
import pickle
import pandas as pd
import json
from flask import Flask, request
from flask_restful import Resource, Api, fields, marshal_with

app = Flask(__name__)
api = Api(app)

class DemandAPI(Resource):

    def __init__(self):
        self.demand_model = pickle.load(open('demand_ML_model.sav', 'rb'))
        self.free_vehicles_model = pickle.load(open('free_vehicles_ML_model.sav', 'rb'))
        # self.year=0
        # self.month=0
        # self.day=0
        # self.hour=0
        # self.zone=0

# @app.route('/<string:name>', methods=['POST', 'GET'])
    def get(self, year, month, day, hour, zone, avg_veh_7_days):
        pass
        # return str(self.Pred_obj.demand_predict(year, month, day, hour, zone))


    def post(self):
        try:
            data = request.get_json(silent=True)
            year = data['year']
            month = data['month']
            week = data['week']
            day = data['day']
            hour = data['hour']
            zone = data['zone']

        except:
            print("put request expected json, other format given ")

        try:
            year = request.args.get('year')
            month = request.args.get('month')
            week = request.args.get('week')
            day = request.args.get('day')
            hour = request.args.get('hour')
            zone = request.args.get('zone')

        except:
            print("put request expected string query, other format given")

        df = pd.read_csv('zones_list.csv')
        df.year = year
        df.month = month
        df.week = week
        df.day = day
        df.hour = hour
        df.zone = df.zone
        df= df[['zone', 'year', 'month', 'week', 'day', 'time']]


        X_test = df.values
        pred_demand = self.demand_model.predict(X_test)
        pred_free_vehicles = self.free_vehicles_model.predict(X_test)
        df['demand'] = pred_demand
        df['free_vehicles'] = pred_free_vehicles
        return flask.jsonify({'demand': json.dumps(dict(zip(df['zone'], df['booked_vehicles']))),
                              'available_cars': json.dumps(dict(zip(df['zone'], df['free_vehicles'])))})

api.add_resource(DemandAPI, '/demand')

app.run(port=5000)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


In [11]:
demand_model = pickle.load(open('demand_RF_model.sav', 'rb'))
free_vehicles_model = pickle.load(open('free_vehicles_RF_model.sav', 'rb'))

In [29]:
df = pd.read_csv('zones_list.csv')

In [30]:
df.head()

Unnamed: 0.1,Unnamed: 0,zone
0,0,V6S 1C2
1,1,V5L 4A2
2,2,V6K 3E5
3,3,V5X 1E4
4,4,V6A 1V9


In [31]:
df['year']=2018
df['month']=12
df['week']=50
df['day']=13
df['hour']=12
encoder = preprocessing.LabelEncoder()
encoder.classes_ = numpy.load('classes.npy')
df['zone'] = encoder.transform(df.zone.get_values())
df=df[['zone','year','month','week','day','hour']]

In [32]:
df.head()

Unnamed: 0,zone,year,month,week,day,hour
0,1331,2018,12,50,13,12
1,242,2018,12,50,13,12
2,1132,2018,12,50,13,12
3,645,2018,12,50,13,12
4,771,2018,12,50,13,12


In [33]:
X_test = df.values
scaler = joblib.load("scaler.save")
X_test=scaler.transform(X_test)
pred_demand = demand_model.predict(X_test)
pred_free_vehicles = free_vehicles_model.predict(X_test)



In [34]:
df['demand'] = pred_demand
df['free_vehicles'] = pred_free_vehicles

In [11]:
# df.demand=df['demand'].apply(numpy.ceil)
# df.free_vehicles = df.free_vehicles.apply(numpy.ceil)
df.demand=df['demand']
df.free_vehicles = df.free_vehicles


In [20]:
X_test

array([[ 1.30337434,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ],
       [-1.15420854,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ],
       [ 0.85571895,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ],
       ...,
       [ 1.23328688,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ],
       [ 1.00493741,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ],
       [ 0.06214803,  0.        ,  0.98140977,  1.69627847, -0.08004961,
        16.        ]])

In [18]:
df[df.demand>20]

Unnamed: 0,zone,year,month,week,day,hour,demand,free_vehicles
574,488,2018,12,50,13,5,24.830215,15.421959
606,484,2018,12,50,13,5,24.686199,15.594415
698,485,2018,12,50,13,5,24.772909,15.594415
822,486,2018,12,50,13,5,24.772909,15.455887
911,487,2018,12,50,13,5,24.772909,15.421959


In [25]:
df[df.demand>20]

Unnamed: 0,zone,year,month,week,day,hour,demand,free_vehicles
574,488,2018,12,50,13,6,24.830215,15.421959
606,484,2018,12,50,13,6,24.686199,15.594415
698,485,2018,12,50,13,6,24.772909,15.594415
822,486,2018,12,50,13,6,24.772909,15.455887
911,487,2018,12,50,13,6,24.772909,15.421959


In [36]:
df[df.demand>20]

Unnamed: 0,zone,year,month,week,day,hour,demand,free_vehicles
574,488,2018,12,50,13,12,24.830215,15.421959
606,484,2018,12,50,13,12,24.686199,15.594415
698,485,2018,12,50,13,12,24.772909,15.594415
822,486,2018,12,50,13,12,24.772909,15.455887
911,487,2018,12,50,13,12,24.772909,15.421959


In [35]:
df[df.demand>20]

Unnamed: 0,zone,year,month,week,day,hour,demand,free_vehicles
574,488,2018,12,50,13,12,24.830215,15.421959
606,484,2018,12,50,13,12,24.686199,15.594415
698,485,2018,12,50,13,12,24.772909,15.594415
822,486,2018,12,50,13,12,24.772909,15.455887
911,487,2018,12,50,13,12,24.772909,15.421959


In [172]:
train_df = train_df.dropna(how='any')
train_df.time.unique()

array([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 12,  0,  1,  2,  3,  4,
        5,  6,  7,  8,  9, 10, 11])

In [171]:
train_df = train_df.dropna(how='any')
train_df.time = train_df.time.astype(str).str[:2].astype(int)

In [170]:
train_df.time = train_df.time.replace(regex=True, to_replace=r'\D', value=r'')

In [169]:
train_df = pd.read_sql_query("select * from zones;", Sql_connection1)

In [1]:
def create_connection( db_file):
        """ create a database connection to the SQLite database
            specified by the db_file
        :param db_file: database file
        :return: Connection object or None
        """
        try:
            conn = sqlite3.connect(db_file)
            return conn
        except Error as e:
            print(e)
        return None

In [3]:
import numpy
import sklearn
import dateutil
import sqlite3
from sqlite3 import Error
from sklearn import preprocessing
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import platform
import os


In [32]:
Sql_connection2 = create_connection('/opt/apps/scripts/jaarvis_demand_supply/zone_travel.db')

In [33]:
supply_df = pd.read_sql_query("select * from zone_travel_detail;", Sql_connection2)

In [34]:
cursor = Sql_connection2.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('zone_travel_detail',)]


In [35]:
supply_df.to_zone.nunique()

923

In [23]:
Sql_connection1 = create_connection('/opt/apps/scripts/jaarvis_demand_supply/evo.db')

In [24]:
cursor = Sql_connection1.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('vehicles',), ('sqlite_sequence',), ('zones',), ('supply',)]


0

In [29]:
supply_df1 = pd.read_sql_query("select * from supply;", Sql_connection1)

In [48]:
train_df

Unnamed: 0,id,time,zone,day,week,month,booked_vehicles,free_vehicles,year
0,1,13:00,V6S 1C2,5,45,11,0.0,1,2018
1,2,13:00,V5L 4A2,5,45,11,0.0,1,2018
2,3,13:00,V6K 3E5,5,45,11,0.0,1,2018
3,4,13:00,V5X 1E4,5,45,11,0.0,1,2018
4,5,13:00,,5,45,11,0.0,4,2018
5,6,13:00,V6A 1V9,5,45,11,0.0,1,2018
6,7,13:00,V3L 3Z7,5,45,11,0.0,1,2018
7,8,13:00,V5V 2V5,5,45,11,0.0,1,2018
8,9,13:00,V3L 5M1,5,45,11,0.0,1,2018
9,10,13:00,V5T 3Z5,5,45,11,0.0,2,2018


In [69]:
train_df[(train_df.zone=='V7B 0A1')].sort_values(['zone','id'])

Unnamed: 0,id,time,zone,day,week,month,booked_vehicles,free_vehicles,year
55395,56285,13,V7B 0A1,10,50,12,1.0,1,2018
