In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import folium
import seaborn as sns


In [7]:

# Load traffic data
df = pd.read_csv("dft_rawcount_local_authority_id_63.csv")
# Select columns
df = df[['count_point_id', 'direction_of_travel', 'year', 'count_date', 'hour',
       'road_type', 'latitude', 'longitude',
       'pedal_cycles', 'two_wheeled_motor_vehicles',
       'cars_and_taxis', 'buses_and_coaches', 'lgvs', 'hgvs_2_rigid_axle',
        'all_hgvs', 'all_motor_vehicles']]

# Create day of the week feature
df["day"] = pd.to_datetime(df["count_date"]).apply(lambda x: x.weekday())

def custom_round(x, base=5):
    return int(base * round(float(x)/base)) or 5

# round numbers
df["all_motor_vehicles"] = df["all_motor_vehicles"].apply(custom_round)

# Load weather data
weather = pd.read_csv("Jan00 - Jan19 hourly.csv")

# Drop empty rows
weather = weather.drop(index=weather.index[:7])
weather["#"] = pd.to_datetime(weather["#"])
weather = weather.rename(columns={"#": "date"})

df["count_date"] = pd.to_datetime(df["count_date"])
df = df.rename(columns={"count_date": "date"})
df2 = df.merge(weather, on="date", how="inner")

df2.isna().sum()
df2 = df2.drop(["Glob Rad W/m2","W Dir","W Spd m/s", "STD W Dir ", "Temp 2m"],axis=1)
df2 = df2.dropna(axis=0)


In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn import preprocessing
import datetime

# Set aside 10% for final evaluation
eval_set = df2.sample(frac=0.1,random_state=0)
df3 = df2.drop(index=eval_set.index)


train_params = ["hour", "road_type","latitude", "longitude", "day","Temp 8m", "Rel Hum %"]

# Two models, 1 for minor roads with low traffic, another for major roads with high traffic
def train(road_type):

    df_train = df3

    encoder = preprocessing.LabelEncoder()

    # Str to ints labels
    df_train = df_train[df_train["road_type"] == road_type]


    X = df_train[train_params]
    X["road_type"] = encoder.fit_transform(X["road_type"])
    y = df_train["all_motor_vehicles"]


    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Transform targets using log to spread them out.
    reg = TransformedTargetRegressor(regressor=GradientBoostingRegressor(n_estimators=350,learning_rate=0.5, random_state=0),
                                        func=np.log1p,
                                        inverse_func=np.expm1)

    reg.fit(X_train,y_train)
    print(reg.score(X_train, y_train), reg.score(X_test,y_test))
    return reg, scaler

X = eval_set[train_params]

encoder = preprocessing.LabelEncoder()

X["road_type"] = encoder.fit_transform(X["road_type"])


# Make predictions on eval set
minor_reg, minor_scaler = train("Minor")

eval_set["minor_pred"] = minor_reg.predict(minor_scaler.transform(X))

major_reg, major_scaler = train("Major")

eval_set["major_pred"] = major_reg.predict(major_scaler.transform(X))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["road_type"] = encoder.fit_transform(X["road_type"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["road_type"] = encoder.fit_transform(X["road_type"])
0.9063987487397113 0.8828881602997249
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["road_type"] = encoder.fit_transform(X["road_type"])
0.

In [10]:
df3 = eval_set

# Get average predictions, vehicle counts for each count point. Get lat/longs abd road type for each count point
df3 =  df3.groupby("count_point_id")[["minor_pred","major_pred", "all_motor_vehicles"]].mean().merge(df3[["road_type", "count_point_id", "latitude", "longitude"]], on="count_point_id", how="inner").drop_duplicates(["count_point_id"])

#Create and position the map
m = folium.Map(location=[53.79870087, -1.54812911])

# Add each counter and its data to the map
for index, x in df3.iterrows():

    txt  = f"Type: {x['road_type']}\n Average_traffic: {np.ceil(x['all_motor_vehicles'])}\nPredicted: {np.ceil(x['major_pred']) if x['road_type'] == 'Major' else np.ceil(x['minor_pred'])}"


    folium.CircleMarker(
    radius=10,
    location=[x["latitude"], x["longitude"]],
    popup=txt,
    color=("blue" if x["road_type"] == "Minor" else "red"),
    fill=True).add_to(m)

m