# Usage Example
1. Load Accidents and Clusters Data
2. Train Models
3. Load Trained Models and Configuration

In [16]:
import utilities
import graphs_templates
import model_training
import pandas as pd
import xgboost as xgb
from sklearn.metrics import roc_curve
import json


## 1. Load Accidents and Clusters Data

In [17]:
config = utilities.load_config()
data = utilities.load_data(config)
clusters_data = pd.read_csv(config["CLUSTERS_DATA_PATH"])

In [18]:
fig = graphs_templates.plot_accidents_by_attr_per_year(df=data, attribute=config["SEVERE_FEATURE"], attribute_year_name=config["YEAR_FEATURE"],
                                                     title=f'Accidents by Severity per Year {config["COUNTRY"]}',
                                                     show=True)

## 2. Train Models

In [19]:
model_training.train_model(data, config, save_path="models/example_israel_model.json")

1 2008
2 2009
3 2010
4 2011
5 2012
6 2013


## 3. Load Trained Models and Configuration

In [20]:
# Load model from file
model_path = 'models/example_israel_model.json'
config_path = 'models/example_israel_model_configuration.json'
with open(config_path) as f:
    model_config = json.load(f)
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.load_model(model_path)
model_clusters  = pd.read_csv(model_config["CLUSTERS_DATA_PATH"])
X_train, y_train, X_test, y_test = utilities.split_train_test(model_clusters, model_config)

pred_fig = graphs_templates.plot_top_prediction_labels(X_test, y_test, xgb_classifier, 0, show=True, title=f'Top Predictions {model_config["COUNTRY"]}')
y_prob = xgb_classifier.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_prob[:, 1])
# Calculate AUC
df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds, "country": model_config["COUNTRY"]})
roc_fig = graphs_templates.plot_roc_curve(df, title=f'ROC Curve {model_config["COUNTRY"]}', color_feature='country', show=True)