In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas
import random
import torch
import torchvision
import cv2
import seaborn
from torchvision.io import read_image
from torchvision.utils import make_grid
%matplotlib inline
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
from torchvision.transforms import Resize
import fiona
from fiona import Feature, Geometry
from shapely.geometry import mapping, shape, Point
import math

plt.rcParams["savefig.bbox"] = 'tight'

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False, figsize=(12,12))
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

In [None]:
CSV_PATH = "../resources/processed/processed/part*.csv"

import glob
filenames = glob.glob(CSV_PATH)
filenames

In [None]:
df = pd.concat([pd.read_csv(f) for f in filenames])
df.dropna(axis=0, how="any", subset=["ticket_id", "coords", "photo", "timestamp", "type"], inplace=True)
df.set_index("ticket_id", inplace=True)
df = df.convert_dtypes()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.loc[df["type"].str.contains("จราจร"),:]
coord_df = df["coords"].str.split(pat=",", n=2, expand=True)
df["lat"] = coord_df[1].astype(float)
df["long"] = coord_df[0].astype(float)

df = df[["photo", "lat", "long", "timestamp", "type"]]

print(df.dtypes)
print(df.shape[0])
df.head()

In [None]:
from shapely.geometry import Point

bangkok_subdistrict_df = geopandas.read_file('Bangkok-subdistricts.geojson')

subdistricts = bangkok_subdistrict_df[['DNAME', 'SNAME', 'geometry']].values.tolist()

def findAddress(longitude, lattitude):
    for subdistrict in subdistricts:
        if subdistrict[2].contains(Point(longitude, lattitude)):
            return [subdistrict[0], subdistrict[1]]
    return [None, None]

In [None]:
new_district = []
new_subdistrict = []
indexes = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    indexes.append(index)
    district, subdistrict = findAddress(row["long"], row["lat"])
    new_district.append(district)
    new_subdistrict.append(subdistrict)

df["subdistrict"] = pd.Series(new_subdistrict, dtype="string", index=indexes)
df["district"] = pd.Series(new_district, dtype="string", index=indexes)

In [None]:
df.dropna(axis=0, how="any", subset=["subdistrict", "district"], inplace=True)

In [None]:
print(df.dtypes)
print(df.shape[0])
df.head()

In [None]:
import mlflow
import mlflow.pytorch

In [None]:
with mlflow.start_run() as run:
    model = torch.hub.load("ultralytics/yolov5", "yolov5x6", pretrained=True)
    mlflow.pytorch.log_model(model, "model")
    print(run.info)

In [None]:
model_uri = "runs:/{}/model".format("630f2cc3384544d0b4d4ccabcee2fdd9")
loaded_model = mlflow.pytorch.load_model(model_uri)

In [None]:
car_count = []
indexes = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    indexes.append(index)
    pred = model("../resources/photos/" + index + ".jpg")
    object_counts = pred.pandas().xyxy[0].name.value_counts()
    if "car" in object_counts:
        car_count.append(object_counts["car"])
    else:
        car_count.append(0)


In [None]:
df["car_count"] = pd.Series(car_count, index=indexes)

In [None]:
df = df[df["car_count"] > 0]

In [None]:
print(df.shape)
df.head()

In [None]:
df.to_csv("after_detect.csv", encoding="utf-8-sig")

In [None]:
df = pd.read_csv("./after_detect.csv")
df.set_index("ticket_id", inplace=True)

In [None]:
images = []
counts = []
resize = Resize((480, 480))

start = random.randint(0, df.shape[0] - 3)
for i in range(start, start+3):
    images.append(resize(read_image("../resources/photos/" + df.index[i] + ".jpg")))
    counts.append(df.iloc[i, -1])
grid = make_grid(images, nrow=3)
print(counts)
show(grid)

In [None]:
bkk_road_df = geopandas.read_file("bangkok_road.shp", enabled_drivers=["ESRI Shapefile"], encoding="utf-8-sig")
bkk_road_df = bkk_road_df[["osm_id", "geometry"]]
bkk_road_df["osm_id"] = bkk_road_df["osm_id"].astype("int64")
bkk_road_df.set_index("osm_id", inplace=True)
print(bkk_road_df.shape)
bkk_road_df.head()

In [None]:
points = []
lats = []
lons = []
osm_ids = []
new_district = []
new_subdistrict = []

for row in tqdm(bkk_road_df.iterrows(), total=bkk_road_df.shape[0]):
    for coord in row[1].geometry.coords:
        lats.append(coord[1])
        lons.append(coord[0])
        osm_ids.append(row[0])
        points.append(Point(coord[0], coord[1]))
        district, subdistrict = findAddress(coord[0], coord[1])
        new_district.append(district)
        new_subdistrict.append(subdistrict)

points_df = geopandas.GeoDataFrame({"osm_id": osm_ids, "district": new_district, "subdistrict": new_subdistrict, "lat": lats, "lon": lons}, crs="EPSG:4326", geometry=points)

In [None]:
points_df

In [None]:
from sklearn.neighbors import BallTree

def get_nearest(src_points, candidates):
    tree = BallTree(candidates, leaf_size=15, metric='haversine')
    dist, indices = tree.query(src_points, k=1)
    indices = indices.transpose()
    return dist.transpose()[0], indices[0]

def nearest_neighbor(left_gdf, right_gdf):
    right = right_gdf.copy().reset_index(drop=True)
    left_radians = np.array([(row["long"] * np.pi / 180, row["lat"] * np.pi / 180) for _, row in left_gdf.iterrows()])
    right_radians = np.array(right["geometry"].apply(lambda geom: (geom.x * np.pi / 180, geom.y * np.pi / 180)).to_list())
    dist, closest = get_nearest(left_radians, right_radians)
    closest_points = right.loc[closest]
    closest_points["point_id"] = closest
    closest_points["ticket_id"] = left_gdf.index
    closest_points.set_index("ticket_id", inplace=True)
    return dist, closest_points

traffy_point = pd.read_csv('after_detect.csv')

for hour in tqdm(range(0,24)):
    splitted = traffy_point[traffy_point['timestamp'].apply(lambda x: int(x[11:13])) == hour]
    this_result = points_df.copy()
    dist, closest_points = nearest_neighbor(splitted, this_result)
    closest_points["car_count"] = splitted["car_count"]

    closest_point_df_groupby = closest_points.groupby(by="point_id")
    closest_points = closest_points.loc[closest_point_df_groupby["car_count"].idxmax()]
    this_result["car_count"] = closest_point_df_groupby["car_count"].max()
    this_result["is_origin"] = this_result["car_count"].notna()
    this_result["car_count"] = this_result["car_count"].fillna(value=0)
    this_result["car_count"] = this_result["car_count"].astype("int64")

    MAX_DIST = 100 # meter
    for i, row in this_result.iterrows():
        if (row["osm_id"] in closest_points["osm_id"].values):
            all_points = closest_points[closest_points["osm_id"] == row["osm_id"]]
            weighted_car_count = []
            for i2, row2 in all_points.iterrows():
                d = 0
                if row["geometry"] != row2["geometry"]:
                    lat1 = row["geometry"].x * np.pi / 180
                    lat2 = row2["geometry"].x * np.pi / 180
                    lon1 = row["geometry"].y * np.pi / 180
                    lon2 = row2["geometry"].y * np.pi / 180
                    angle = math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) *  math.cos(lon2-lon1)
                    d = math.acos(angle)
                    d *= 6371000
                if d < MAX_DIST:
                    weighted_car_count.append((1 - (d / MAX_DIST)) * row2["car_count"])
                else:
                    weighted_car_count.append(0)
            this_result.loc[i, "car_count"] = round(max(weighted_car_count))
    
    this_result
    this_result["hour"] = hour
    this_result.drop(columns=["osm_id", "geometry"], inplace=True)

    this_result.to_csv('traffy_splitted_time' + str(hour) + '.csv', encoding="utf-8-sig")

In [None]:
traffy_result = pd.DataFrame()

for hour in range(0, 24):
    tmp_file = pd.read_csv('traffy_splitted_time' + str(hour) + '.csv')
    traffy_result = pd.concat([traffy_result, tmp_file])
    

In [None]:
traffy_result

In [None]:
traffy_result.drop('Unnamed: 0', axis=True, inplace=True)

In [None]:
traffy_result.reset_index(drop=True, inplace=True)

In [None]:
traffy_result

In [None]:
traffy_result.to_csv("total.csv")

In [None]:
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://myadmin:Admin_123456@datasciproject.mysql.database.azure.com/traffy_project')
traffy_result.to_sql('prediction_complete3', con = engine)