In [1]:
import pandas as pd

traffic_df = pd.read_csv('trafikkdata.csv', sep=';')

In [2]:
traffic_df.describe(include='all')

Unnamed: 0,Dato,Fra tidspunkt,Til tidspunkt,Trafikkmengde
count,8784,8784,8784,8784
unique,366,24,24,2108
top,2022-10-30,00:00,01:00,68
freq,25,366,366,37


In [3]:
traffic_df['Date'] = pd.to_datetime(traffic_df['Dato'], format='%Y-%m-%d')
traffic_df['Hour'] = pd.to_datetime(traffic_df['Fra tidspunkt'], format='%H:%M').dt.hour

traffic_df['year'] = traffic_df['Date'].dt.year
traffic_df['month'] = traffic_df['Date'].dt.month
traffic_df['day'] = traffic_df['Date'].dt.day

traffic_df = traffic_df.drop(columns=['Dato', 'Fra tidspunkt', 'Til tidspunkt', 'Date'])
traffic_df['Trafikkmengde'] = pd.to_numeric(traffic_df['Trafikkmengde'], errors='coerce')


In [4]:
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=5)

traffic_df = pd.DataFrame(imputer.fit_transform(traffic_df), columns=traffic_df.columns)

In [5]:
traffic_df.describe(include='all')

Unnamed: 0,Trafikkmengde,Hour,year,month,day
count,8784.0,8784.0,8784.0,8784.0,8784.0
mean,929.319148,11.5,2022.745788,6.533584,15.760018
std,639.366297,6.922581,0.435442,3.445747,8.816924
min,27.0,0.0,2022.0,1.0,1.0
25%,306.0,5.75,2022.0,4.0,8.0
50%,894.0,11.5,2023.0,7.0,16.0
75%,1504.0,17.25,2023.0,10.0,23.0
max,2438.0,23.0,2023.0,12.0,31.0


In [6]:
# Divided the data into two dataframes randomly 80% and 20%

train = traffic_df.sample(frac=0.8, random_state=42)
test = traffic_df.drop(train.index)

X_train = train.drop(columns=['Trafikkmengde'])
y_train = train['Trafikkmengde']

X_test = test.drop(columns=['Trafikkmengde'])
y_test = test['Trafikkmengde']


In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

model = RandomForestRegressor( random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('R2 Score:', r2_score(y_test, y_pred))




Mean Squared Error: 56616.20765917814
Mean Absolute Error: 171.29296983494592
R2 Score: 0.8643761213580394


In [8]:
from xgboost import XGBRegressor

model = XGBRegressor( random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('R2 Score:', r2_score(y_test, y_pred))


model.save_model('models/traffic_model.json')

Mean Squared Error: 40202.64514917007
Mean Absolute Error: 136.92877053762606
R2 Score: 0.9036947387995362
