In [10]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas
import random
import torch
import torchvision
import cv2
import seaborn
from torchvision.io import read_image
from torchvision.utils import make_grid
%matplotlib inline
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
from torchvision.transforms import Resize
import fiona
from fiona import Feature, Geometry
from shapely.geometry import mapping, shape, Point
import math

plt.rcParams["savefig.bbox"] = 'tight'

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False, figsize=(12,12))
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

In [2]:
CSV_PATH = "../resources/processed/processed/part*.csv"

df = pd.read_csv(CSV_PATH)
df.dropna(axis=0, how="any", subset=["ticket_id", "coords", "photo", "timestamp", "type"], inplace=True)
df.set_index("ticket_id", inplace=True)
df = df.convert_dtypes()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.loc[df["type"].str.contains("จราจร"),:]
coord_df = df["coords"].str.split(pat=",", n=2, expand=True)
df["lat"] = coord_df[1].astype(float)
df["long"] = coord_df[0].astype(float)

df = df[["photo", "lat", "long", "timestamp", "type"]]

print(df.dtypes)
print(df.shape[0])
df.head()

photo                     string
lat                      float64
long                     float64
timestamp    datetime64[ns, UTC]
type                      string
dtype: object
12386


Unnamed: 0_level_0,photo,lat,long,timestamp,type
ticket_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-C8DAVL,https://storage.googleapis.com/traffy_public_b...,13.74059,100.51974,2021-12-10 05:01:04.955289+00:00,{จราจร}
2021-DDFNVZ,https://storage.googleapis.com/traffy_public_b...,13.76308,100.54699,2021-12-10 06:41:33.611765+00:00,"{ความสะอาด,จราจร}"
2021-CJ2F7F,https://storage.googleapis.com/traffy_public_b...,13.75868,100.53467,2021-12-10 10:06:40.713800+00:00,"{จราจร,ถนน}"
2021-4QKUUN,https://storage.googleapis.com/traffy_public_b...,13.74219,100.55281,2021-12-18 07:36:06.837338+00:00,{จราจร}
2021-H4RRY3,https://storage.googleapis.com/traffy_public_b...,13.78444,100.49413,2021-12-22 01:24:57.223484+00:00,{จราจร}


In [3]:
from shapely.geometry import Point

bangkok_subdistrict_df = geopandas.read_file('Bangkok-subdistricts.geojson')

subdistricts = bangkok_subdistrict_df[['DNAME', 'SNAME', 'geometry']].values.tolist()

def findAddress(longitude, lattitude):
    for subdistrict in subdistricts:
        if subdistrict[2].contains(Point(longitude, lattitude)):
            return [subdistrict[0], subdistrict[1]]
    return [None, None]

In [3]:
new_district = []
new_subdistrict = []
indexes = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    indexes.append(index)
    district, subdistrict = findAddress(row["long"], row["lat"])
    new_district.append(district)
    new_subdistrict.append(subdistrict)

df["subdistrict"] = pd.Series(new_subdistrict, dtype="string", index=indexes)
df["district"] = pd.Series(new_district, dtype="string", index=indexes)

100%|██████████| 12386/12386 [00:25<00:00, 482.54it/s]

74
74





In [4]:
df.dropna(axis=0, how="any", subset=["subdistrict", "district"], inplace=True)

In [5]:
print(df.dtypes)
print(df.shape[0])
df.head()

photo                       string
lat                        float64
long                       float64
timestamp      datetime64[ns, UTC]
type                        string
subdistrict                 string
district                    string
dtype: object
12312


Unnamed: 0_level_0,photo,lat,long,timestamp,type,subdistrict,district
ticket_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-C8DAVL,https://storage.googleapis.com/traffy_public_b...,13.74059,100.51974,2021-12-10 05:01:04.955289+00:00,{จราจร},แขวงรองเมือง,เขตปทุมวัน
2021-DDFNVZ,https://storage.googleapis.com/traffy_public_b...,13.76308,100.54699,2021-12-10 06:41:33.611765+00:00,"{ความสะอาด,จราจร}",แขวงดินแดง,เขตดินแดง
2021-CJ2F7F,https://storage.googleapis.com/traffy_public_b...,13.75868,100.53467,2021-12-10 10:06:40.713800+00:00,"{จราจร,ถนน}",แขวงทุ่งพญาไท,เขตราชเทวี
2021-4QKUUN,https://storage.googleapis.com/traffy_public_b...,13.74219,100.55281,2021-12-18 07:36:06.837338+00:00,{จราจร},แขวงคลองเตย,เขตคลองเตย
2021-H4RRY3,https://storage.googleapis.com/traffy_public_b...,13.78444,100.49413,2021-12-22 01:24:57.223484+00:00,{จราจร},แขวงบางบำหรุ,เขตบางพลัด


In [11]:
import mlflow
import mlflow.pytorch

In [18]:
with mlflow.start_run() as run:
    model = torch.hub.load("ultralytics/yolov5", "yolov5x6", pretrained=True)
    mlflow.pytorch.log_model(model, "model")
    print(run.info)

Using cache found in C:\Users\ACER/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-5-14 Python-3.8.16 torch-1.13.1 CUDA:0 (NVIDIA GeForce GTX 1660 Ti, 6144MiB)

Fusing layers... 
YOLOv5x6 summary: 574 layers, 140730220 parameters, 0 gradients
Adding AutoShape... 


<RunInfo: artifact_uri='file:///c:/Users/ACER/Desktop/DataSci-Eng/spark/execution_scripts/mlruns/0/880e41ecfff24fd8b0961f87e0560e06/artifacts', end_time=None, experiment_id='0', lifecycle_stage='active', run_id='880e41ecfff24fd8b0961f87e0560e06', run_name='calm-moose-682', run_uuid='880e41ecfff24fd8b0961f87e0560e06', start_time=1684497358943, status='RUNNING', user_id='ACER'>


In [17]:
model_uri = "runs:/{}/model".format("630f2cc3384544d0b4d4ccabcee2fdd9")
loaded_model = mlflow.pytorch.load_model(model_uri)

In [None]:
car_count = []
indexes = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    indexes.append(index)
    pred = model("../resources/photos/" + index + ".jpg")
    object_counts = pred.pandas().xyxy[0].name.value_counts()
    if "car" in object_counts:
        car_count.append(object_counts["car"])
    else:
        car_count.append(0)


In [None]:
df["car_count"] = pd.Series(car_count, index=indexes)

In [9]:
df = df[df["car_count"] > 0]

In [10]:
print(df.shape)
df.head()

(9671, 8)


Unnamed: 0_level_0,photo,lat,long,timestamp,type,subdistrict,district,car_count
ticket_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-C8DAVL,https://storage.googleapis.com/traffy_public_b...,13.74059,100.51974,2021-12-10 05:01:04.955289+00:00,{จราจร},แขวงรองเมือง,เขตปทุมวัน,9
2021-CJ2F7F,https://storage.googleapis.com/traffy_public_b...,13.75868,100.53467,2021-12-10 10:06:40.713800+00:00,"{จราจร,ถนน}",แขวงทุ่งพญาไท,เขตราชเทวี,28
2021-4QKUUN,https://storage.googleapis.com/traffy_public_b...,13.74219,100.55281,2021-12-18 07:36:06.837338+00:00,{จราจร},แขวงคลองเตย,เขตคลองเตย,3
2021-H4RRY3,https://storage.googleapis.com/traffy_public_b...,13.78444,100.49413,2021-12-22 01:24:57.223484+00:00,{จราจร},แขวงบางบำหรุ,เขตบางพลัด,9
2022-9EHJW2,https://storage.googleapis.com/traffy_public_b...,13.70611,100.52756,2022-01-16 11:14:00.944767+00:00,"{จราจร,สะพาน,ถนน}",แขวงทุ่งวัดดอน,เขตสาทร,3


In [13]:
df.to_csv("after_detect.csv", encoding="utf-8-sig")

In [6]:
df = pd.read_csv("./after_detect.csv")
df.set_index("ticket_id", inplace=True)

In [7]:
images = []
counts = []
resize = Resize((480, 480))

start = random.randint(0, df.shape[0] - 3)
for i in range(start, start+3):
    images.append(resize(read_image("../resources/photos/" + df.index[i] + ".jpg")))
    counts.append(df.iloc[i, -1])
grid = make_grid(images, nrow=3)
print(counts)
show(grid)

[1, 2, 2]


In [8]:
bkk_road_df = geopandas.read_file("bangkok_road.shp", enabled_drivers=["ESRI Shapefile"], encoding="utf-8-sig")
bkk_road_df = bkk_road_df[["osm_id", "geometry"]]
bkk_road_df["osm_id"] = bkk_road_df["osm_id"].astype("int64")
bkk_road_df.set_index("osm_id", inplace=True)
print(bkk_road_df.shape)
bkk_road_df.head()

(14562, 1)


Unnamed: 0_level_0,geometry
osm_id,Unnamed: 1_level_1
8654614,"LINESTRING (100.51343 13.76331, 100.51370 13.7..."
8671143,"LINESTRING (100.56136 13.73643, 100.56135 13.7..."
8673756,"LINESTRING (100.59308 13.69712, 100.59262 13.6..."
9017482,"LINESTRING (100.58164 13.75484, 100.58235 13.7..."
9019480,"LINESTRING (100.54014 13.74450, 100.54012 13.7..."


In [9]:
points = []
lats = []
lons = []
osm_ids = []
new_district = []
new_subdistrict = []

for row in tqdm(bkk_road_df.iterrows(), total=bkk_road_df.shape[0]):
    for coord in row[1].geometry.coords:
        lats.append(coord[1])
        lons.append(coord[0])
        osm_ids.append(row[0])
        points.append(Point(coord[0], coord[1]))
        district, subdistrict = findAddress(coord[0], coord[1])
        new_district.append(district)
        new_subdistrict.append(subdistrict)

points_df = geopandas.GeoDataFrame({"osm_id": osm_ids, "district": new_district, "subdistrict": new_subdistrict, "lat": lats, "lon": lons}, crs="EPSG:4326", geometry=points)

100%|██████████| 14562/14562 [05:27<00:00, 44.47it/s] 


In [10]:
points_df

Unnamed: 0,osm_id,district,subdistrict,lat,lon,geometry
0,8654614,เขตดุสิต,แขวงสวนจิตรลดา,13.763314,100.513428,POINT (100.51343 13.76331)
1,8654614,เขตดุสิต,แขวงสวนจิตรลดา,13.763196,100.513697,POINT (100.51370 13.76320)
2,8654614,เขตดุสิต,แขวงสวนจิตรลดา,13.762169,100.514985,POINT (100.51498 13.76217)
3,8654614,เขตดุสิต,แขวงสวนจิตรลดา,13.761843,100.515394,POINT (100.51539 13.76184)
4,8654614,เขตดุสิต,แขวงสวนจิตรลดา,13.761719,100.515548,POINT (100.51555 13.76172)
...,...,...,...,...,...,...
113217,1149060855,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754341,100.528690,POINT (100.52869 13.75434)
113218,1149060855,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754304,100.528797,POINT (100.52880 13.75430)
113219,1149060855,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754269,100.528894,POINT (100.52889 13.75427)
113220,1149060855,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754226,100.529014,POINT (100.52901 13.75423)


In [11]:
from sklearn.neighbors import BallTree

def get_nearest(src_points, candidates):
    tree = BallTree(candidates, leaf_size=15, metric='haversine')
    dist, indices = tree.query(src_points, k=1)
    indices = indices.transpose()
    return dist.transpose()[0], indices[0]

def nearest_neighbor(left_gdf, right_gdf):
    right = right_gdf.copy().reset_index(drop=True)
    left_radians = np.array([(row["long"] * np.pi / 180, row["lat"] * np.pi / 180) for _, row in left_gdf.iterrows()])
    right_radians = np.array(right["geometry"].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
    dist, closest = get_nearest(left_radians, right_radians)
    closest_points = right.loc[closest]
    closest_points["point_id"] = closest
    closest_points["ticket_id"] = left_gdf.index
    closest_points.set_index("ticket_id", inplace=True)
    return dist, closest_points

traffy_point = pd.read_csv('after_detect.csv')

for hour in tqdm(range(0,24)):
    splitted = traffy_point[traffy_point['timestamp'].apply(lambda x: int(x[11:13])) == hour]
    this_result = points_df.copy()
    dist, closest_points = nearest_neighbor(splitted, this_result)
    closest_points["car_count"] = splitted["car_count"]

    closest_point_df_groupby = closest_points.groupby(by="point_id")
    closest_points = closest_points.loc[closest_point_df_groupby["car_count"].idxmax()]
    this_result["car_count"] = closest_point_df_groupby["car_count"].max()
    this_result["is_origin"] = this_result["car_count"].notna()
    this_result["car_count"] = this_result["car_count"].fillna(value=0)
    this_result["car_count"] = this_result["car_count"].astype("int64")

    MAX_DIST = 100 # meter
    for i, row in this_result.iterrows():
        if (row["osm_id"] in closest_points["osm_id"].values):
            all_points = closest_points[closest_points["osm_id"] == row["osm_id"]]
            weighted_car_count = []
            for i2, row2 in all_points.iterrows():
                d = 0
                if row["geometry"] != row2["geometry"]:
                    lat1 = row["geometry"].x * np.pi / 180
                    lat2 = row2["geometry"].x * np.pi / 180
                    lon1 = row["geometry"].y * np.pi / 180
                    lon2 = row2["geometry"].y * np.pi / 180
                    angle = math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) *  math.cos(lon2-lon1)
                    d = math.acos(angle)
                    d *= 6371000
                if d < MAX_DIST:
                    weighted_car_count.append((1 - (d / MAX_DIST)) * row2["car_count"])
                else:
                    weighted_car_count.append(0)
            this_result.loc[i, "car_count"] = round(max(weighted_car_count))
    
    this_result
    this_result["hour"] = hour
    this_result.drop(columns=["osm_id", "geometry"], inplace=True)

    this_result.to_csv('traffy_splitted_time' + str(hour) + '.csv', encoding="utf-8-sig")

100%|██████████| 24/24 [14:36<00:00, 36.51s/it]


In [20]:
traffy_result = pd.DataFrame()

for hour in range(0, 24):
    tmp_file = pd.read_csv('traffy_splitted_time' + str(hour) + '.csv')
    traffy_result = pd.concat([traffy_result, tmp_file])
    

In [25]:
traffy_result

Unnamed: 0.1,Unnamed: 0,district,subdistrict,lat,lon,car_count,is_origin,hour
0,0,เขตดุสิต,แขวงสวนจิตรลดา,13.763314,100.513428,0,False,0
1,1,เขตดุสิต,แขวงสวนจิตรลดา,13.763196,100.513697,0,False,0
2,2,เขตดุสิต,แขวงสวนจิตรลดา,13.762169,100.514985,0,False,0
3,3,เขตดุสิต,แขวงสวนจิตรลดา,13.761843,100.515394,0,False,0
4,4,เขตดุสิต,แขวงสวนจิตรลดา,13.761719,100.515548,0,False,0
...,...,...,...,...,...,...,...,...
113217,113217,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754341,100.528690,0,False,23
113218,113218,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754304,100.528797,0,False,23
113219,113219,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754269,100.528894,0,False,23
113220,113220,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754226,100.529014,0,False,23


In [26]:
traffy_result.drop('Unnamed: 0', axis=True, inplace=True)

In [33]:
traffy_result.reset_index(drop=True, inplace=True)

In [34]:
traffy_result

Unnamed: 0,district,subdistrict,lat,lon,car_count,is_origin,hour
0,เขตดุสิต,แขวงสวนจิตรลดา,13.763314,100.513428,0,False,0
1,เขตดุสิต,แขวงสวนจิตรลดา,13.763196,100.513697,0,False,0
2,เขตดุสิต,แขวงสวนจิตรลดา,13.762169,100.514985,0,False,0
3,เขตดุสิต,แขวงสวนจิตรลดา,13.761843,100.515394,0,False,0
4,เขตดุสิต,แขวงสวนจิตรลดา,13.761719,100.515548,0,False,0
...,...,...,...,...,...,...,...
2717323,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754341,100.528690,0,False,23
2717324,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754304,100.528797,0,False,23
2717325,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754269,100.528894,0,False,23
2717326,เขตราชเทวี,แขวงถนนเพชรบุรี,13.754226,100.529014,0,False,23


In [None]:
traffy_result.to_csv("total.csv")

In [35]:
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://myadmin:Admin_123456@datasciproject.mysql.database.azure.com/traffy_project')
traffy_result.to_sql('prediction_complete3', con = engine)

2717328