In [106]:
import pandas as pd
import numpy as np

In [107]:
from sklearn.externals import joblib



In [108]:
model = joblib.load('model.joblib')

In [109]:
df = pd.read_csv('../data/df_final.csv')

In [110]:
df = df.drop(columns='Unnamed: 0')

In [113]:
df.head()

Unnamed: 0,STATE,COUNTY,DAY,MONTH,YEAR,DAY_WEEK,LATITUDE,LONGITUD,HOUR,WEATHER,ROUTE,TWAY_ID,TYP_INT,ACCIDENT
0,1,73,19,2,2017,1,33.335661,-87.007094,23,1,1,I-459,1,1
1,1,89,14,2,2017,3,34.661528,-86.786853,14,1,1,I-565,1,1
2,1,101,31,1,2017,3,32.366519,-86.145281,20,1,1,I-85,1,1
3,1,73,1,1,2017,1,33.510175,-86.894003,16,2,6,20TH ST ENSLEY,2,1
4,1,13,1,1,2017,1,31.947236,-86.556778,20,2,1,I-65,1,1


In [None]:
df = df.drop(columns=['ACCIDENT', 'STATE', 'COUNTY'])

In [9]:
from sklearn.model_selection import train_test_split
trainval, test = train_test_split(df, test_size=0.2, random_state=42)

In [10]:
target = 'ACCIDENT'
train_features = trainval.drop(columns=[target, 'STATE', 'COUNTY'])
numeric_features = train_features.select_dtypes(include='number').columns.tolist()
categorical_features = train_features.select_dtypes(exclude='number').columns.tolist()
cardinality = train_features.select_dtypes(exclude='number').nunique()
high_cardinality_features = cardinality[cardinality > 100].index.tolist()
low_cardinality_features = cardinality[cardinality <= 100].index.tolist()
features = numeric_features + categorical_features

X_trainval = trainval[features]
y_trainval = trainval[target]
X_test = test[features]
y_test = test[target]

In [11]:
y_proba = model.predict_proba(X_test)[:, 1]

In [12]:
from sklearn.metrics import roc_auc_score, roc_curve

roc_auc_score(y_test, y_proba)

0.8550888319588322

In [19]:
from sklearn.metrics import classification_report
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92     80235
           1       0.94      0.29      0.44     20064

    accuracy                           0.85    100299
   macro avg       0.90      0.64      0.68    100299
weighted avg       0.87      0.85      0.82    100299



In [20]:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred):
    labels = unique_labels(y_true)
    columns = [f'Predicted {label}' for label in labels]
    index = [f'Actual {label}' for label in labels]
    table = pd.DataFrame(confusion_matrix(y_true, y_pred), 
                         columns=columns, index=index)
    return sns.heatmap(table, annot=True, fmt='d', cmap='viridis')

In [21]:
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, fixed
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.utils.multiclass import unique_labels

def set_threshold(y_true, y_pred_proba, threshold=0.1):
    """
    For binary classification problems. 
    y_pred_proba : predicted probability of class 1
    """
    
    # Apply threshold to predicted probabilities
    # to get discrete predictions
    class_0, class_1 = unique_labels(y_true)
    y_pred = np.full_like(y_true, fill_value=class_0)
    y_pred[y_pred_proba > threshold] = class_1
    
    # Plot distribution of predicted probabilities
    ax = sns.distplot(y_pred_proba)
    ax.axvline(threshold, color='red')
    plt.title('Distribution of predicted probabilities')
    plt.show()

    # Calculate true positive rate and false positive rate
    true_positives = (y_pred==y_true) & (y_pred==class_1)
    false_positives = (y_pred!=y_true) & (y_pred==class_1)
    actual_positives = (y_true==class_1)
    actual_negatives = (y_true==class_0)
    true_positive_rate = true_positives.sum() / actual_positives.sum()
    false_positive_rate = false_positives.sum() / actual_negatives.sum()
    print('False Positive Rate', false_positive_rate)
    print('True Positive Rate', true_positive_rate)
    
    # Plot ROC curve
    fpr, tpr, thresholds = roc_curve(y_true==class_1, y_pred_proba)
    plt.plot(fpr, tpr)
    plt.title('ROC curve')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    
    # Plot point on ROC curve for the current threshold
    plt.scatter(false_positive_rate, true_positive_rate)
    plt.show()
    
    # Show ROC AUC score
    print('Area under the Receiver Operating Characteristic curve:', 
          roc_auc_score(y_true, y_pred_proba))
    
    # Show confusion matrix & classification report
    plot_confusion_matrix(y_true, y_pred)
    print(classification_report(y_true, y_pred))

interact(set_threshold, 
         y_true=fixed(y_test), 
         y_pred_proba=fixed(y_proba), 
         threshold=(0,1,0.05));

interactive(children=(FloatSlider(value=0.1, description='threshold', max=1.0, step=0.05), Output()), _dom_cla…

## Flask API

In [34]:
def make_dataframe(input):
    column_headers = ['DAY', 'MONTH', 'YEAR', 'DAY_WEEK', 'LATITUDE', 'LONGITUD', 'HOUR',
                       'WEATHER', 'ROUTE', 'TYP_INT', 'TWAY_ID']
    X = pd.DataFrame(input, columns=column_headers)
    y = model.predict(X)
    y_pred_proba = model.predict_proba(X)[:, 1].tolist()
    return y_pred_proba

In [4]:
import json
import requests

In [610]:
input = json.dumps(
[{
        "LATITUDE": 30.748005970539843,
        "LONGITUD": -60.11728659260366,
        "WEATHER": 2,
        "ROUTE": 6,
        "TYP_INT": 1,
        "TWAY_ID": "WOLF RIDGE RD"
    },
    {
        "LATITUDE": 32.54497821745906,
        "LONGITUD": -53.63684929388019,
        "WEATHER": 1,
        "ROUTE": 2,
        "TYP_INT": 1,
        "TWAY_ID": "US-80 E TEXAS STREET"
    }
])

In [405]:
input = json.dumps(
    [{"DAY": 25, 
     "MONTH": 5, 
     "YEAR": 2016, 
     "DAY_WEEK": 2, 
     "LATITUDE": 30.748005970539843, 
     "LONGITUD": -88.11728659260366, 
     "HOUR": 3, 
     "WEATHER": 2,
     "ROUTE": 6,
     "TYP_INT": 1,
     "TWAY_ID": "WOLF RIDGE RD"}])

In [483]:
print(input)

[{"LATITUDE": 30.748005970539843, "LONGITUD": -60.11728659260366, "WEATHER": 2, "ROUTE": 6, "TYP_INT": 1, "TWAY_ID": "WOLF RIDGE RD"}, {"LATITUDE": 32.54497821745906, "LONGITUD": -53.63684929388019, "WEATHER": 1, "ROUTE": 2, "TYP_INT": 1, "TWAY_ID": "US-80 E TEXAS STREET"}]


In [619]:
input = json.loads(input)

In [314]:
type(input)

str

In [400]:
print(input)

[{'DAY': 25, 'MONTH': 5, 'YEAR': 2016, 'DAY_WEEK': 2, 'LATITUDE': 30.748005970539843, 'LONGITUD': -88.11728659260366, 'HOUR': 3, 'WEATHER': 2, 'ROUTE': 6, 'TYP_INT': 1, 'TWAY_ID': 'WOLF RIDGE RD'}]


In [488]:
data = []
for i in range(len(input)):
    for keys, values in input[i].items():
        observation = [today.day, today.month, today.year, 
                              today.weekday(), input[i]['LATITUDE'], 
                              input[i]['LONGITUD'], input[i]['HOUR'], 
                              input[i]['WEATHER'], input[i]['ROUTE'], 
                              input[i]['TYP_INT'], input[i]['TWAY_ID']]
    data.append(observation)

KeyError: 'HOUR'

In [402]:
data

[[25,
  5,
  2016,
  2,
  30.748005970539843,
  -88.11728659260366,
  3,
  2,
  6,
  1,
  'WOLF RIDGE RD']]

In [403]:
X = pd.DataFrame(data, columns=column_headers)

In [404]:
make_dataframe(data)

[0.20049859583377838]

In [168]:
from datetime import datetime
today = datetime.today()

In [603]:
today.hour

20

In [486]:
observation = [[today.day, today.month, today.year, today.weekday(), float(input['LATITUDE']), 
                float(input['LONGITUD']), int(input['HOUR']), int(input['WEATHER']), int(input['ROUTE']), 
                int(input['TYP_INT']), input['TWAY_ID']]]

TypeError: list indices must be integers or slices, not str

In [198]:
observation

[[24,
  9,
  2019,
  1,
  30.748005970539843,
  -88.11728659260366,
  3,
  2,
  6,
  1,
  'WOLF RIDGE RD']]

In [303]:
result = make_dataframe(data)

In [305]:
result[0]

0.20049859583377838

In [None]:
prediction_[i] = {
    "LATITUDE": input['LONGITUD'],
    "LONGITUDE": input['LATITUDE'],
    "PROBABILITY OF ACCIDENT": result[i]
}

In [327]:
def prediction(input):
    data = []
    pred = {}
    for i in range(len(input)):
        for keys, values in input[i].items():
            observation = [today.day, today.month, today.year, today.weekday(), float(input[i]['LATITUDE']), 
                        float(input[i]['LONGITUD']), int(input[i]['HOUR']), int(input[i]['WEATHER']), 
                        int(input[i]['ROUTE']), int(input[i]['TYP_INT']), input[i]['TWAY_ID']]
        data.append(observation)
        pred[i] = {
        "LATITUDE": input[i]['LONGITUD'],
        "LONGITUDE": input[i]['LATITUDE'],
        "PROBABILITY OF ACCIDENT": result[i]
        }
    return pred

In [347]:
prediction(input)

AttributeError: 'str' object has no attribute 'items'

In [197]:
# url = "http://localhost:5000/api"
url = "http://saferoads.herokuapp.com/api"

In [201]:
input = json.dumps({"LATITUDE": 30, "LONGITUD": -90})

In [202]:
send = requests.post(url, input)

In [203]:
print(send.json())

{'data': [{'accidents_2015': 10, 'accidents_2016': 11, 'accidents_2017': 12, 'current_weather': 'Clear', 'fatalities_2015': 10, 'fatalities_2016': 11, 'fatalities_2017': 16, 'hour_most_accidents': '10 PM', 'latitude': 30.02476389, 'longitude': -90.01293333, 'month_most_accidents': 'Oct', 'most_common_type_collision': 'Not collision with motor vehicle', 'probability_accident': '21.45%', 'risk_level': 'Moderate to High', 'weekday_most_accidents': 'Wednesday'}, {'accidents_2015': 10, 'accidents_2016': 11, 'accidents_2017': 12, 'current_weather': 'Clear', 'fatalities_2015': 10, 'fatalities_2016': 11, 'fatalities_2017': 16, 'hour_most_accidents': '10 PM', 'latitude': 30.02054444, 'longitude': -89.95975556, 'month_most_accidents': 'Oct', 'most_common_type_collision': 'Not collision with motor vehicle', 'probability_accident': '24.79%', 'risk_level': 'Moderate to High', 'weekday_most_accidents': 'Wednesday'}, {'accidents_2015': 10, 'accidents_2016': 11, 'accidents_2017': 12, 'current_weather'

In [481]:
No Additional Atmospheric Conditions 0
Clear 1
Cloudy 10
Rain 2
Sleet or Hail 3
Freezing Rain or Drizzle 12
Snow 4
Blowing Snow 11
Fog, Smog, Smoke 5
Severe Crosswinds 6
Blowing Sand, Soil, Dirt 7
Other 8
Not Reported 98
Unknown 99

SyntaxError: invalid syntax (<ipython-input-481-6ea6fd4d7020>, line 1)

In [None]:
if main =
thunderstorm, then 2
if main = Drizzle, then 12
if main = Rain, then 2
if main = Snow, then 4
if main = Dust, then 7
if main = Sand, then 7
if main = Smoke, then 5
if main = Clear, then 1
if main = Clouds, then 10
if main = Fog, then 5


In [602]:
import requests
    
def get_weather(lat, lon):
    URL = f'http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&APPID=ae2fff8ef560467a665dab34e9784d4c'
    r = requests.get(URL)
    data = r.json()
    return data

print(get_weather(56.95, -11.83))

{'coord': {'lon': -11.83, 'lat': 56.95}, 'weather': [{'id': 803, 'main': 'Clouds', 'description': 'broken clouds', 'icon': '04n'}], 'base': 'stations', 'main': {'temp': 285.691, 'pressure': 987.62, 'humidity': 84, 'temp_min': 285.691, 'temp_max': 285.691, 'sea_level': 987.62, 'grnd_level': 988.18}, 'wind': {'speed': 3.42, 'deg': 193.403}, 'clouds': {'all': 67}, 'dt': 1569443738, 'sys': {'message': 0.011, 'sunrise': 1569393460, 'sunset': 1569436835}, 'timezone': 3600, 'id': 0, 'name': '', 'cod': 200}


In [604]:
weather = get_weather(input[i]['LATITUDE'], input[i]['LONGITUD'])

TypeError: string indices must be integers

In [642]:
input = json.dumps(
[{
        "LATITUDE": 20.748005970539843,
        "LONGITUD": -60.11728659260366,
        "ROUTE": 6,
        "TYP_INT": 1,
        "TWAY_ID": "WOLF RIDGE RD"
    },
    {
        "LATITUDE": 22.54497821745906,
        "LONGITUD": -53.63684929388019,
        "ROUTE": 2,
        "TYP_INT": 1,
        "TWAY_ID": "US-80 E TEXAS STREET"
    }
])

In [638]:
input = json.loads(input)

In [639]:
def prediction(input):
    data = []
    pred = {}
    for i in range(len(input)):
            for keys, values in input[i].items():
                weather = get_weather(input[i]['LATITUDE'], input[i]['LONGITUD'])
                observation = [today.day, today.month, today.year, 
                              today.weekday(), input[i]['LATITUDE'], 
                              input[i]['LONGITUD'], today.hour, 
                              weather, input[i]['ROUTE'], 
                                  input[i]['TYP_INT'], input[i]['TWAY_ID']]
            data.append(observation)
            pred[i] = {
            "LATITUDE": input[i]['LONGITUD'],
            "LONGITUDE": input[i]['LATITUDE'],
            "PROBABILITY OF ACCIDENT": result[i]
            }
    return data

In [635]:
get_weather(56.95, -11.83)['weather'][0]

{'id': 803, 'main': 'Clouds', 'description': 'broken clouds', 'icon': '04n'}

In [694]:
def get_weather(lat, lon):
    URL = f'http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&APPID=ae2fff8ef560467a665dab34e9784d4c'
    r = requests.get(URL)
    data = r.json()
    return r

In [695]:
r.url

'http://api.openweathermap.org/data/2.5/weather?lat=34&lon=-50.34&APPID=ae2fff8ef560467a665dab34e9784d4c'

In [692]:
data = get_weather(56.95, -11.83)

In [693]:
data['weather'][0]['main']

KeyError: 'weather'

In [None]:
if main =
thunderstorm, then 2
if main = Drizzle, then 12
if main = Rain, then 2
if main = Snow, then 4
if main = Dust, then 7
if main = Sand, then 7
if main = Smoke, then 5
if main = Clear, then 1
if main = Clouds, then 10
if main = Fog, then 5


In [742]:
weather_dict = {
    'Clear': 1,
    'Clouds': 10,
    'Rain': 2,
    'Thunderstorm': 2,
    'Snow': 4,
    'Sand': 7,
    'Dust': 7,
    'Smoke': 5,
    'Fog': 5,
    'Drizzle': 12
}

def get_weather():
#     URL = f'http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&APPID=ae2fff8ef560467a665dab34e9784d4c'
#     r = requests.get(URL)
#     data = r.json()
#     data = {"coord":{"lon":30,"lat":50},"weather":[{"id":800,"main":"Smoke","description":"clear sky","icon":"01n"}],"base":"stations","main":{"temp":286.02,"pressure":1014,"humidity":62,"temp_min":285.93,"temp_max":286.15},"visibility":10000,"wind":{"speed":3,"deg":130},"clouds":{"all":0},"dt":1569436540,"sys":{"type":1,"id":8903,"message":0.0076,"country":"UA","sunrise":1569383403,"sunset":1569426818},"timezone":10800,"id":709248,"name":"Fastiv","cod":200}
    data = {"cod":429, "message": "Your account is temporary blocked due to exceeding of requests limitation of your subscription type. Please choose the proper subscription http://openweathermap.org/price"}
    if 'weather' not in data:
        weather = 99
        description = 'Unknown'
    else:
        description = data['weather'][0]['main']
        if description in weather_dict.keys():
            weather = weather_dict[description]
        else:
            weather = 99
    return weather, description

In [743]:
#     data = {"coord":{"lon":30,"lat":50},"weather":[{"id":800,"main":"Smoke","description":"clear sky","icon":"01n"}],"base":"stations","main":{"temp":286.02,"pressure":1014,"humidity":62,"temp_min":285.93,"temp_max":286.15},"visibility":10000,"wind":{"speed":3,"deg":130},"clouds":{"all":0},"dt":1569436540,"sys":{"type":1,"id":8903,"message":0.0076,"country":"UA","sunrise":1569383403,"sunset":1569426818},"timezone":10800,"id":709248,"name":"Fastiv","cod":200}
#     data = {"cod":429, "message": "Your account is temporary blocked due to exceeding of requests limitation of your subscription type. Please choose the proper subscription http://openweathermap.org/price"}

In [744]:
t = get_weather()

In [746]:
t[0]

99

In [70]:
example = {
	"4": {
		"LATITUDE": 30.748005970539843,
		"LONGITUD": -60.11728659260366,
		"PROBABILITY OF ACCIDENT": "24.93%",
		"RISK LEVEL": "Moderate to High"
	},
	"5": {
		"LATITUDE": 32.54497821745906,
		"LONGITUD": -53.63684929388019,
		"PROBABILITY OF ACCIDENT": "20.06%",
		"RISK LEVEL": "Moderate to High"
	}
}

In [71]:
example

{'4': {'LATITUDE': 30.748005970539843,
  'LONGITUD': -60.11728659260366,
  'PROBABILITY OF ACCIDENT': '24.93%',
  'RISK LEVEL': 'Moderate to High'},
 '5': {'LATITUDE': 32.54497821745906,
  'LONGITUD': -53.63684929388019,
  'PROBABILITY OF ACCIDENT': '20.06%',
  'RISK LEVEL': 'Moderate to High'}}

In [26]:
example['4']

{'LATITUDE': 30.748005970539843,
 'LONGITUD': -60.11728659260366,
 'PROBABILITY OF ACCIDENT': '24.93%',
 'RISK LEVEL': 'Moderate to High'}

In [80]:
def get_array(example):
    arr = [example[item] for index, item in enumerate(example)]
    dict = {"data": arr}
    return dict

In [82]:
r = get_array(example)

In [84]:
r['data'][0]

{'LATITUDE': 30.748005970539843,
 'LONGITUD': -60.11728659260366,
 'PROBABILITY OF ACCIDENT': '24.93%',
 'RISK LEVEL': 'Moderate to High'}

In [119]:
df_db = pd.read_csv('/data/df_db.csv')

FileNotFoundError: [Errno 2] File b'/data/df_db.csv' does not exist: b'/data/df_db.csv'

In [126]:
input = json.dumps({"LATITUDE": 22.54497821745906, "LONGITUD": -53.63684929388019})

In [127]:
input = json.loads(input)

In [128]:
input['LATITUDE']

22.54497821745906