In [None]:
# !pip install -U "git+ssh://git@github.com/MindSetLib/MS-InsuranceScoring.git@InsolverPackage#egg=insolver"

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from hyperopt import hp

In [3]:
from insolver.InsolverDataFrame import InsolverDataFrame
from insolver.InsolverTransforms import (
    TransformExp,
    InsolverTransformMain,
    InsolverTransforms,
    TransformAge,
    TransformMapValues,
    TransformPolynomizer,
    TransformAgeGender,
)
from insolver.InsolverWrapperGBM import InsolverGradientBoostingWrapper
from insolver.InsolverPlotsGBM import PredictionPlots, ShapPlots
from insolver.InsolverWrapperGLM import InsolverGLMWrapper
from insolver.InsolverUtils import train_val_test_split

In [4]:
file_path = '/home/andrey/PycharmProjects/MS-InsuranceScoring/US_Accidents_June20.csv'
# https://smoosavi.org/datasets/us_accidents

In [5]:
df = pd.read_csv(file_path, low_memory=False)
df.shape

(3513740, 49)

In [6]:
df = df.sample(int(3513740/100))

In [7]:
df = df.drop(columns=['ID', 'Source', 'End_Lat', 'End_Lng'])
df.dropna(how='all', axis=1, inplace=True)

In [8]:
iglm = InsolverGLMWrapper()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,16 mins 01 secs
H2O_cluster_timezone:,Europe/Moscow
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.30.1.2
H2O_cluster_version_age:,13 days
H2O_cluster_name:,H2O_from_python_andrey_ueby22
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.836 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


In [9]:
InsTransforms = InsolverTransforms(df, [])
InsTransforms.transform()
InsTransforms.save('transforms.pkl')

In [10]:
df = InsTransforms.get_data()

In [11]:
train, valid, test = InsTransforms.split_frame(val_size=0.15, test_size=0.15, random_state=0, shuffle=True)

In [12]:
features = [
    'Start_Time',
    'End_Time',
    'Start_Lat',
    'Start_Lng',
    'Distance(mi)',
#     'Number',
#     'Street',
#     'Side',
#     'City',
#     'County',
#     'State',
#     'Zipcode',
#     'Country',
#     'Timezone',
#     'Airport_Code',
    'Weather_Timestamp',
    'Temperature(F)',
#     'Wind_Chill(F)',
#     'Humidity(%)',
#     'Pressure(in)',
#     'Visibility(mi)',
#     'Wind_Direction',
#     'Wind_Speed(mph)',
#     'Precipitation(in)',
#     'Weather_Condition',
#     'Amenity',
#     'Bump',
#     'Crossing',
#     'Give_Way',
#     'Junction',
#     'No_Exit',
#     'Railway',
#     'Roundabout',
#     'Station',
#     'Stop',
#     'Traffic_Calming',
#     'Traffic_Signal',
#     'Turning_Loop',
#     'Sunrise_Sunset',
#     'Civil_Twilight',
#     'Nautical_Twilight',
    'Astronomical_Twilight'
]
target = 'Severity'

In [13]:
iglm.model_init(train, valid, family='gamma', link='log')

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [14]:
params = {'lambda': [1, 0.5, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0],
          'alpha': [i * 0.1 for i in range(0, 11)]}

In [15]:
iglm.grid_search_cv(features, target, params, search_criteria={'strategy': "Cartesian"})

glm Grid Build progress: |████████████████████████████████████████████████| 100%


In [16]:
iglm.save_model('glm')

In [17]:
predict_glm = iglm.predict(test)

Parse progress: |█████████████████████████████████████████████████████████| 100%
glm prediction progress: |████████████████████████████████████████████████| 100%


In [18]:
predict_glm

array([[2.34311124],
       [2.38576238],
       [2.35420108],
       ...,
       [2.27897115],
       [2.26447496],
       [2.55389683]])

In [19]:
test.sample(1).to_json('request_example.json')

In [None]:
!insolver_serving -model glm/Grid_GLM_Key_Frame__upload_bba72b272657febf2d840057309e4f4e.hex_model_python_1600347516868_1_model_71 -transforms transforms.pkl -service flask

[2020-09-17 17:44:25 +0300] [170209] [INFO] Starting gunicorn 20.0.4
[2020-09-17 17:44:25 +0300] [170209] [INFO] Listening at: http://127.0.0.1:8000 (170209)
[2020-09-17 17:44:25 +0300] [170209] [INFO] Using worker: sync
[2020-09-17 17:44:25 +0300] [170211] [INFO] Booting worker with pid: 170211
INFO:flask-app:[2020-Sep-17 17:44:30] request from 127.0.0.1: {'TMC': {'2584554': None}, 'Severity': {'2584554': 2}, 'Start_Time': {'2584554': '2016-04-26 19:31:05'}, 'End_Time': {'2584554': '2016-04-27 01:31:05'}, 'Start_Lat': {'2584554': 40.74006}, 'Start_Lng': {'2584554': -73.78995}, 'Distance(mi)': {'2584554': 0.62}, 'Description': {'2584554': 'At Utopia Pky/Exit 25 - Accident.'}, 'Number': {'2584554': None}, 'Street': {'2584554': 'Long Island Expy W'}, 'Side': {'2584554': 'R'}, 'City': {'2584554': 'Fresh Meadows'}, 'County': {'2584554': 'Queens'}, 'State': {'2584554': 'NY'}, 'Zipcode': {'2584554': '11365'}, 'Country': {'2584554': 'US'}, 'Timezone': {'2584554': 'US/Eastern'}, 'Airport_Code'