Part 1: Retrieve traffic image metadata in a certain period from Gov.sg

In [None]:
# Import libraries
import requests
import time
import datetime
import pandas as pd
import concurrent.futures

In [None]:
# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Define a function to retrieve json data, especially image URLs from API of Gov.sg
def retrieveCameraJSON(date_time):
  url = 'https://api.data.gov.sg/v1/transport/traffic-images'
  temp_url = url + '?date_time=' + date_time.isoformat()
  response = requests.get(temp_url)
  data = response.json()
  return data

In [None]:
# Create a list of datetime to define the period you intend to retrieve data from
jan2019 = pd.date_range(start='2019-01-01', end='2019-02-01', freq='T')
jan2020 = pd.date_range(start='2020-01-01', end='2020-02-01', freq='T')

In [None]:
# Make concurrent API calls for Jan 2019 data
completed_2019 = []
future_list_2019 = []
with concurrent.futures.ThreadPoolExecutor(max_workers=150) as executor:
  for date_time in jan2019:
    futures = executor.submit(retrieveCameraJSON, date_time)
    future_list_2019.append(futures)
  for future in concurrent.futures.as_completed(future_list_2019):
    completed_2019.append(future.result())


In [None]:
# Get 2019 Dataframe
list_of_dfs = []
for i in range(len(completed_2019)):
  print(i)
  print(completed_2019[i]['items'])
  df = pd.json_normalize(completed_2019[i]['items'],  record_path = ['cameras'], meta_prefix ='.', errors='ignore')
  list_of_dfs.append(df)

new_df = pd.concat(list_of_dfs)
new_df

In [None]:
# Focus on the image URLs from one specific traffic camera
new_df_2 = new_df[new_df['camera_id'] == '1709']
new_df_2 = new_df_2.drop_duplicates(subset=['image'])
new_df_2 = new_df_2.sort_values(by=['timestamp'])
new_df_2.head(5)

In [None]:
# Export 2019 DataFrame to CSV
new_df_2.to_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2019.csv', index=False)

In [None]:
# Make concurrent API calls for Jan 2020 data
completed_2020 = []
future_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=150) as executor:
  for date_time in jan2020:
    futures = executor.submit(retrieveCameraJSON, date_time)
    future_list.append(futures)
  for future in concurrent.futures.as_completed(future_list):
    completed_2020.append(future.result())

In [None]:
# Step 16: Get the 2020 Jan DataFrame
list_of_dfs = []
for i in range(len(completed_2020)):
  df = pd.json_normalize(completed_2020[i]['items'],  record_path = ['cameras'], meta_prefix ='.', errors='ignore')
  list_of_dfs.append(df)

new_df = pd.concat(list_of_dfs)
new_df

In [None]:
# Focus on the image URLs from one specific traffic camera
new_df_2 = new_df[new_df['camera_id'] == '1709']
new_df_2 = new_df_2.drop_duplicates(subset=['image'])
new_df_2 = new_df_2.sort_values(by=['timestamp'])
new_df_2.head(5)

In [None]:
# Export 2020 DataFrame to CSV
new_df_2.to_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2020.csv', index=False)

Part 2: Extract original images from all URL recorded in the URL column in the two dataframes of metadata generated


---



In [None]:
import pandas as pd
import os
import requests
from io import BytesIO
from PIL import Image
import concurrent.futures

In [None]:
df2019 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2019.csv')
df2020 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2020.csv')

In [None]:
# define function to loop through dataframe later to get images
def getImages(index, row, destination_url):
  row_num = index
  temp_url = row['image']
  temp_res = requests.get(temp_url)
  try:
    image_bytes = BytesIO(temp_res.content)
    img = Image.open(image_bytes)
    img.save('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/' + destination_url +'/' + str(row_num) +'.jpg')
  except Exception as e:
    print(e)
    pass

In [None]:
# Run a concurrent getImages call for Jan 2019
destination_url = 'car_image_2019_Jan'
completed = []
future_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=150) as executor:
  for index, row in df2019.iterrows():
    futures = executor.submit(getImages, index, row, destination_url)
    future_list.append(futures)
  for future in concurrent.futures.as_completed(future_list):
    completed.append(future.result())

In [None]:
# Count the number of images in car_image_2019_Jan
len(os.listdir('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/' + destination_url +'/'))

In [None]:
# Retrieve all images for 2020 Jan
destination_url = 'car_image_2020_Jan'
completed = []
future_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=150) as executor:
  for index, row in df2020.iterrows():
    futures = executor.submit(getImages, index, row, destination_url)
    future_list.append(futures)
  for future in concurrent.futures.as_completed(future_list):
    completed.append(future.result())

In [None]:
# Count the number of images in car_image_2020
len(os.listdir('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/' + destination_url +'/'))

Before Part 3: Prepare for OpenCV in Colab

In [None]:
%cd /content
!git clone https://github.com/opencv/opencv
!git clone https://github.com/opencv/opencv_contrib
!mkdir /content/build
%cd /content/build
!cmake -DOPENCV_EXTRA_MODULES_PATH=/content/opencv_contrib/modules  -DBUILD_SHARED_LIBS=OFF  -DBUILD_TESTS=OFF  -DBUILD_PERF_TESTS=OFF -DBUILD_EXAMPLES=OFF -DWITH_OPENEXR=OFF -DWITH_CUDA=ON -DWITH_CUBLAS=ON -DWITH_CUDNN=ON -DOPENCV_DNN_CUDA=ON /content/opencv
!make -j8 install

Part 3: Car counting

In [None]:
# import and check cv2 version
import os
import cv2
cv2.__version__

In [None]:
!pip install cvlib

In [None]:
# import other libraries for this part
import cvlib as cv
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt
from cvlib.object_detection import draw_bbox

In [None]:
# Import dataframes again
df2019 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2019.csv')
df2020 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2020.csv')

In [None]:
# Define a function for looping through the dataframes later and detect and count the cars in the image
def getLabelCount(index, folder_path):
  temp_path = folder_path + str(index) + '.jpg'
  try:
    temp_img = cv2.imread(temp_path)
    bbox, label, conf = cv.detect_common_objects(temp_img, model='yolov4', enable_gpu=True)
  except Exception as e:
    label = []
  return len(label)

In [None]:
# Get a list of getLabelCount tuples for 2019
label_list = []
for i in df2019.index:
  label_length = getLabelCount(index = i, folder_path = '/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/car_image_2019_Jan/')
  label_list.append(label_length)

In [None]:
# Determine length of list
len(label_list)

In [None]:
# Add a new column named num_cars
df2019['num_cars'] = label_list
df2019

In [None]:
df2019.to_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2019_2.csv', index=False)

In [None]:
# Likewise for 2020:
label_list = []
for i in df2020.index:
  label_length = getLabelCount(index = i, folder_path = '/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/car_image_2020_Jan/')
  label_list.append(label_length)

In [None]:
len(label_list)

29939

In [None]:
df2020['num_cars'] = label_list
df2020
df2020.to_csv('/content/drive/MyDrive/Colab Notebooks/Uplevel traffic/jan2020_2.csv', index=False)