In [1]:
!pip install Khayyam

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!pip install -r requirements_2.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import numpy as np
import pandas as pd 
from khayyam import JalaliDatetime
from datetime import timedelta

from flaml import AutoML
from sklearn.metrics import r2_score

In [4]:
df = pd.read_csv('traffic.csv', parse_dates=['DateTime'])

In [5]:
df

Unnamed: 0,DateTime,Junction,Car
0,2020-05-02 00:00:00,1,25
1,2020-05-02 01:00:00,1,23
2,2020-05-02 02:00:00,1,20
3,2020-05-02 03:00:00,1,12
4,2020-05-02 04:00:00,1,19
...,...,...,...
48115,2021-12-30 19:00:00,4,26
48116,2021-12-30 20:00:00,4,55
48117,2021-12-30 21:00:00,4,31
48118,2021-12-30 22:00:00,4,37


In [6]:
model = AutoML(task='regression', time_budget=60, verbose=0)
train = df.loc[:48000]
test = df.loc[48000:]
model.fit(train.drop('Car', axis=1), train.Car)
y_pred = model.predict(test.drop('Car', axis=1))
r2score = round(r2_score(test.Car, y_pred),2) * 100 
print(f'performance of model is {r2score}%')

performance of model is 20.0%


In [7]:
from khayyam import JalaliDatetime

In [8]:
#TODO: convert gregorian datetime to jalali datatime
df['JalaliDateTime'] = df['DateTime'].apply(lambda y: JalaliDatetime(y))
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime
0,2020-05-02 00:00:00,1,25,1399-02-13 00:00:00.000000
1,2020-05-02 01:00:00,1,23,1399-02-13 01:00:00.000000
2,2020-05-02 02:00:00,1,20,1399-02-13 02:00:00.000000
3,2020-05-02 03:00:00,1,12,1399-02-13 03:00:00.000000
4,2020-05-02 04:00:00,1,19,1399-02-13 04:00:00.000000
...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09 19:00:00.000000
48116,2021-12-30 20:00:00,4,55,1400-10-09 20:00:00.000000
48117,2021-12-30 21:00:00,4,31,1400-10-09 21:00:00.000000
48118,2021-12-30 22:00:00,4,37,1400-10-09 22:00:00.000000


In [9]:
def categorize_hour (hour):
    if hour  < 6:
        return 0
    if hour  < 12:
        return 1
    if hour  < 15:
        return 2
    if hour  < 18:
        return 3
    if hour  < 22:
        return 4
    return 5

In [10]:
#TODO: add hour of record to dateset as a new column named `hour`
df['hour'] =  df['DateTime'].dt.hour.apply(categorize_hour)
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour
0,2020-05-02 00:00:00,1,25,1399-02-13 00:00:00.000000,0
1,2020-05-02 01:00:00,1,23,1399-02-13 01:00:00.000000,0
2,2020-05-02 02:00:00,1,20,1399-02-13 02:00:00.000000,0
3,2020-05-02 03:00:00,1,12,1399-02-13 03:00:00.000000,0
4,2020-05-02 04:00:00,1,19,1399-02-13 04:00:00.000000,0
...,...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09 19:00:00.000000,4
48116,2021-12-30 20:00:00,4,55,1400-10-09 20:00:00.000000,4
48117,2021-12-30 21:00:00,4,31,1400-10-09 21:00:00.000000,4
48118,2021-12-30 22:00:00,4,37,1400-10-09 22:00:00.000000,5


In [11]:
def categorize_day (day):
  if day== "Friday":
    return 1
  else:
    return 0

In [12]:
#TODO: determine whether it is a holiday or not. Add this feature to `df` as a new column named `IsHoliday`
x=df['DateTime'].dt.day_name()
df['IsHoliday'] = x.apply(categorize_day)
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour,IsHoliday
0,2020-05-02 00:00:00,1,25,1399-02-13 00:00:00.000000,0,0
1,2020-05-02 01:00:00,1,23,1399-02-13 01:00:00.000000,0,0
2,2020-05-02 02:00:00,1,20,1399-02-13 02:00:00.000000,0,0
3,2020-05-02 03:00:00,1,12,1399-02-13 03:00:00.000000,0,0
4,2020-05-02 04:00:00,1,19,1399-02-13 04:00:00.000000,0,0
...,...,...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09 19:00:00.000000,4,0
48116,2021-12-30 20:00:00,4,55,1400-10-09 20:00:00.000000,4,0
48117,2021-12-30 21:00:00,4,31,1400-10-09 21:00:00.000000,4,0
48118,2021-12-30 22:00:00,4,37,1400-10-09 22:00:00.000000,5,0


In [13]:
def categorize_month (mn):
  if mn.month<= 6:
    return 0
  else:
    return 1

In [14]:
df['IsCold']=  df['JalaliDateTime'].apply(categorize_month)

In [15]:
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour,IsHoliday,IsCold
0,2020-05-02 00:00:00,1,25,1399-02-13 00:00:00.000000,0,0,0
1,2020-05-02 01:00:00,1,23,1399-02-13 01:00:00.000000,0,0,0
2,2020-05-02 02:00:00,1,20,1399-02-13 02:00:00.000000,0,0,0
3,2020-05-02 03:00:00,1,12,1399-02-13 03:00:00.000000,0,0,0
4,2020-05-02 04:00:00,1,19,1399-02-13 04:00:00.000000,0,0,0
...,...,...,...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09 19:00:00.000000,4,0,1
48116,2021-12-30 20:00:00,4,55,1400-10-09 20:00:00.000000,4,0,1
48117,2021-12-30 21:00:00,4,31,1400-10-09 21:00:00.000000,4,0,1
48118,2021-12-30 22:00:00,4,37,1400-10-09 22:00:00.000000,5,0,1


In [16]:
addtodf = pd.get_dummies(df['Junction'],prefix='Junc')
addtodf

Unnamed: 0,Junc_1,Junc_2,Junc_3,Junc_4
0,1,0,0,0
1,1,0,0,0
2,1,0,0,0
3,1,0,0,0
4,1,0,0,0
...,...,...,...,...
48115,0,0,0,1
48116,0,0,0,1
48117,0,0,0,1
48118,0,0,0,1


In [17]:
#TODO: one-hot-encode the `Jucntion` column and add 4 columns to `df` with `Junc` prefix 
df = pd.concat([df, addtodf], axis=1)
df.head()

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour,IsHoliday,IsCold,Junc_1,Junc_2,Junc_3,Junc_4
0,2020-05-02 00:00:00,1,25,1399-02-13 00:00:00.000000,0,0,0,1,0,0,0
1,2020-05-02 01:00:00,1,23,1399-02-13 01:00:00.000000,0,0,0,1,0,0,0
2,2020-05-02 02:00:00,1,20,1399-02-13 02:00:00.000000,0,0,0,1,0,0,0
3,2020-05-02 03:00:00,1,12,1399-02-13 03:00:00.000000,0,0,0,1,0,0,0
4,2020-05-02 04:00:00,1,19,1399-02-13 04:00:00.000000,0,0,0,1,0,0,0


In [18]:
df['JalaliDateTime'] = df['JalaliDateTime'].apply(lambda x: x.date())
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour,IsHoliday,IsCold,Junc_1,Junc_2,Junc_3,Junc_4
0,2020-05-02 00:00:00,1,25,1399-02-13,0,0,0,1,0,0,0
1,2020-05-02 01:00:00,1,23,1399-02-13,0,0,0,1,0,0,0
2,2020-05-02 02:00:00,1,20,1399-02-13,0,0,0,1,0,0,0
3,2020-05-02 03:00:00,1,12,1399-02-13,0,0,0,1,0,0,0
4,2020-05-02 04:00:00,1,19,1399-02-13,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09,4,0,1,0,0,0,1
48116,2021-12-30 20:00:00,4,55,1400-10-09,4,0,1,0,0,0,1
48117,2021-12-30 21:00:00,4,31,1400-10-09,4,0,1,0,0,0,1
48118,2021-12-30 22:00:00,4,37,1400-10-09,5,0,1,0,0,0,1


In [19]:
df['JalaliDateTime_y'] = df['JalaliDateTime'].apply(lambda x: str(x)[:4])
df['JalaliDateTime_y'] = df['JalaliDateTime_y'].apply(lambda x: int(x))

df['JalaliDateTime_d'] = df['JalaliDateTime'].apply(lambda x: str(x)[-2:])
df['JalaliDateTime_d'] = df['JalaliDateTime_d'].apply(lambda x: int(x))
df

Unnamed: 0,DateTime,Junction,Car,JalaliDateTime,hour,IsHoliday,IsCold,Junc_1,Junc_2,Junc_3,Junc_4,JalaliDateTime_y,JalaliDateTime_d
0,2020-05-02 00:00:00,1,25,1399-02-13,0,0,0,1,0,0,0,1399,13
1,2020-05-02 01:00:00,1,23,1399-02-13,0,0,0,1,0,0,0,1399,13
2,2020-05-02 02:00:00,1,20,1399-02-13,0,0,0,1,0,0,0,1399,13
3,2020-05-02 03:00:00,1,12,1399-02-13,0,0,0,1,0,0,0,1399,13
4,2020-05-02 04:00:00,1,19,1399-02-13,0,0,0,1,0,0,0,1399,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48115,2021-12-30 19:00:00,4,26,1400-10-09,4,0,1,0,0,0,1,1400,9
48116,2021-12-30 20:00:00,4,55,1400-10-09,4,0,1,0,0,0,1,1400,9
48117,2021-12-30 21:00:00,4,31,1400-10-09,4,0,1,0,0,0,1,1400,9
48118,2021-12-30 22:00:00,4,37,1400-10-09,5,0,1,0,0,0,1,1400,9


In [20]:
# drop comments column 
df = df.drop(['DateTime','Junction','JalaliDateTime'], axis=1)
df

Unnamed: 0,Car,hour,IsHoliday,IsCold,Junc_1,Junc_2,Junc_3,Junc_4,JalaliDateTime_y,JalaliDateTime_d
0,25,0,0,0,1,0,0,0,1399,13
1,23,0,0,0,1,0,0,0,1399,13
2,20,0,0,0,1,0,0,0,1399,13
3,12,0,0,0,1,0,0,0,1399,13
4,19,0,0,0,1,0,0,0,1399,13
...,...,...,...,...,...,...,...,...,...,...
48115,26,4,0,1,0,0,0,1,1400,9
48116,55,4,0,1,0,0,0,1,1400,9
48117,31,4,0,1,0,0,0,1,1400,9
48118,37,5,0,1,0,0,0,1,1400,9


**model**

In [21]:
y=  df[['Car']]
x = df.copy()
x.drop(columns= ['Car'], inplace= True)

In [22]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle=True)

In [23]:
from sklearn.linear_model import LinearRegression

model =LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

In [24]:
# evaluate your model on validate data
from sklearn.metrics import r2_score
r2_score(y_val, y_pred)

0.7229812060142917

In [26]:
y_pred = model.predict(X_test)

In [27]:
# evaluate your model on test data
r2_score(y_test, y_pred)

0.7330714914183845