# Predicting Traffic

We will be predicting how many vehicles are on a stretch of road near a baseball stadium

In [22]:
import numpy
from matplotlib import pyplot
from sklearn.metrics import classification_report, mean_absolute_error
from sklearn.ensemble import ExtraTreesRegressor
from sklearn import model_selection, preprocessing
import ipywidgets as widgets
from IPython.display import display

## Loading Data

In [33]:
input_file = "traffic_data.txt"
records = []

with open(input_file, 'r') as file:
    for line in file.readlines():
        records.append(line[:-1].split(","))

data = numpy.array(records)

label_encoders = []
data_encoded = numpy.empty(data.shape)
for i, item in enumerate(records[0]):
    # For each attribute...
    if item.isdigit():
        data_encoded[:, i] = data[:, i]
    else:
        label_encoders.append(preprocessing.LabelEncoder())
        data_encoded[:, i] = label_encoders[-1].fit_transform(data[:, i])

features = data_encoded[:, :-1].astype(int)
classes = data_encoded[:, -1].astype(int)

## Training the Regressor

In [34]:
split = model_selection.train_test_split(features, classes, test_size=0.25, random_state=5)
feature_train, feature_test = split[0], split[1]
class_train, class_test = split[2], split[3]

regressor = ExtraTreesRegressor(n_estimators=100, max_depth=4, random_state=0)
regressor.fit(feature_train, class_train)

predictions = regressor.predict(feature_test)
mad = mean_absolute_error(class_test, predictions)

print("Mean Absolute Error: {:.2f}".format(mad))

Mean Absolute Error: 7.42


## Setting Up the Widgets

In [49]:
def dropdown(encoder, description):
    return widgets.Dropdown(
        options=list(sorted(encoder.classes_)),
        description=description
    )

date = dropdown(label_encoders[0], "Weekday")
time = dropdown(label_encoders[1], "Time of Day")
away_team = dropdown(label_encoders[2], "Away Team")
active_game = dropdown(label_encoders[3], "Active Game?")

## Predicting Traffic

In [50]:
def predict(date, time, away_team, active_game):
    test_datapoint = [date, time, away_team, active_game]
    test_datapoint_encoded = [-1] * len(test_datapoint)
    
    count = 0
    for i, item in enumerate(test_datapoint):
        if item.isdigit():
            test_datapoint_encoded[i] = int(test_datapoint[i])
        else:
            test_datapoint_encoded[i] = int(label_encoders[count].transform([test_datapoint[i]]))
            count += 1

    print("Predicted traffic: {0:.0f}".format(regressor.predict([test_datapoint_encoded])[0]))

widgets.interactive(
    predict,
    date=date,
    time=time,
    away_team=away_team,
    active_game=active_game
)

interactive(children=(Dropdown(description='Weekday', options=('Friday', 'Monday', 'Saturday', 'Sunday', 'Thur…