In [1]:
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
class dataproc:
  """A simple class for ranking detectors by speed and occupancy"""
  def __init__(self,path):
    self.path = path
    self.n = 1
    self.days = [2,3]

  def read_csv_format(self):
    df = pd.read_csv(self.path)
    df['datetime'] = pd.to_datetime(df['datetime'])

    return df

  def pick_random(self):
    # df is dataframe
    # n is the number of detectors to filter
    df = self.read_csv_format()
    unique_detectors = df['detector_id'].unique()
    select_detectors = random.sample(unique_detectors.tolist(),self.n)
    df_filter = df[df['detector_id'].isin(select_detectors)]
    return df_filter


  def visualize(self, df):
    return df['occupancy'].plot.hist(), df.plot(x='datetime',y='occupancy')

  def split_data(self,df):
    df['day'] = df['datetime'].dt.day
    df['hour']= df['datetime'].dt.hour
    df = df[(df['hour']>=6) & (df['hour']<=20) ]
    return df[df['day']==self.days[0]],df[df['day']==self.days[1]]


In [4]:
path = '/content/gdrive/MyDrive/CV_ENG_8160/Data/delay_bottleneck.csv'
dp = dataproc(path)

In [5]:
df = dp.read_csv_format()

In [6]:
df.head()

Unnamed: 0,detector_id,travelway,direction,loc,datetime,speed,volume,occupancy,congested,long,lat
0,MI064E009.1D,64,East,9.1,2017-09-01 02:09:00,62.0,1.0,1.0,0.0,-90.7043,38.7141
1,MI064E009.1D,64,East,9.6,2017-09-01 02:09:00,61.285714,1.47619,1.238095,0.0,-90.7043,38.7141
2,MI064E009.1D,64,East,10.1,2017-09-01 02:09:00,61.214286,1.52381,1.261905,0.0,-90.7043,38.7141
3,MI064E009.1D,64,East,9.1,2017-09-01 02:44:00,59.25,1.25,1.0,0.0,-90.7043,38.7141
4,MI064E009.1D,64,East,9.6,2017-09-01 02:44:00,63.833333,1.071429,1.0,0.0,-90.7043,38.7141


In [7]:
print(len(df['detector_id'].unique()))

194


In [10]:
unique_values = df['detector_id'].unique()

value_to_number = {value: number+1 for number, value in enumerate(unique_values)}

df['Mapped'] = df['detector_id'].map(value_to_number)



In [11]:
df.head()

Unnamed: 0,detector_id,travelway,direction,loc,datetime,speed,volume,occupancy,congested,long,lat,Mapped
0,MI064E009.1D,64,East,9.1,2017-09-01 02:09:00,62.0,1.0,1.0,0.0,-90.7043,38.7141,1
1,MI064E009.1D,64,East,9.6,2017-09-01 02:09:00,61.285714,1.47619,1.238095,0.0,-90.7043,38.7141,1
2,MI064E009.1D,64,East,10.1,2017-09-01 02:09:00,61.214286,1.52381,1.261905,0.0,-90.7043,38.7141,1
3,MI064E009.1D,64,East,9.1,2017-09-01 02:44:00,59.25,1.25,1.0,0.0,-90.7043,38.7141,1
4,MI064E009.1D,64,East,9.6,2017-09-01 02:44:00,63.833333,1.071429,1.0,0.0,-90.7043,38.7141,1


In [12]:
df.tail()

Unnamed: 0,detector_id,travelway,direction,loc,datetime,speed,volume,occupancy,congested,long,lat,Mapped
894820,MR367S001.7D,367,South,2.5,2017-09-07 23:29:00,39.925926,1.296296,1.222222,0.083,-90.231247,38.819611,194
894821,MR367S001.7D,367,South,2.5,2017-09-07 23:34:00,40.355556,1.192593,1.17037,0.083,-90.231247,38.819611,194
894822,MR367S001.7D,367,South,2.5,2017-09-07 23:39:00,41.511111,1.392593,1.281481,0.083,-90.231247,38.819611,194
894823,MR367S001.7D,367,South,2.0,2017-09-07 23:59:00,56.0,1.0,1.0,0.0,-90.231247,38.819611,194
894824,MR367S001.7D,367,South,2.5,2017-09-07 23:59:00,41.0,1.0,1.0,0.083,-90.231247,38.819611,194
