In [1]:
# https://www.404bugs.com/details/1079375428891496448
import cv2 as cv
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch, torchvision
from sklearn import metrics
from sklearn.model_selection import train_test_split
from imageai.Detection import ObjectDetection

from datetime import datetime
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
import lightgbm
import matplotlib.image as mpimg
from sklearn.metrics import f1_score, log_loss
from sklearn.model_selection import StratifiedKFold, KFold

In [2]:
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("./model/yolov3.pt")
detector.loadModel()
# COCO Name
# 'person', 'bicycle', 'car', 'motorbike', 'bus', 'truck', 'traffic light', 'stop sign'
_ = detector.CustomObjects(person=1, bicycle=1, car=1, motorbike=1, bus=1, 
                                        truck=1, traffic_light=1, stop_sign=1)

In [3]:
def obj_detect(data, path):
    detect_results = []
    for point in tqdm(data):
        map_id = point['id']
        temp = {}
        root_path = path + str(map_id) + '/'
        # y_length, x_length, _ = cv.imread(root_path + f"{point['key_frame']}").shape
        result = [detector.detectObjectsFromImage(input_image=root_path + f"{_frame['frame_name']}") for _frame in point["frames"]]
        temp['id'] = map_id
        temp['frames'] = result
        detect_results.append(temp)
    return detect_results

In [4]:
def get_obj_data(df, car_list):
    map_id_list=[]
    obj_num_list = []
    car_num_list = []
    percentage_list = []
    pos_max_list = []
    pos_min_list = []
    pos_mean_list = []
    pos_median_list = []
    pos_std_list = []

    for (map_id, frames) in zip(df['id'], df['frames']):
        for item in frames:
            temp_car = np.array([[_['percentage_probability']] + _["box_points"] for _ in item if _["name"] in car_list])
            # print(temp_car)
            if temp_car.shape[0]:
                map_id_list.append(map_id)
                obj_num_list.append(len(item))
                car_num_list.append(temp_car.shape[0])
                percentage_list.append([temp_car[:, 0].mean(), temp_car[:, 0].std(), temp_car[:, 0].max()])
                temp_x = temp_car[:, 1::2].mean(axis=1)
                temp_y = temp_car[:, 2::2].mean(axis=1)
                pos_max_list.append([temp_x.max(), temp_y.max()])
                pos_min_list.append([temp_x.min(), temp_y.min()])
                pos_mean_list.append([temp_x.mean(), temp_y.mean()])
                pos_median_list.append([np.median(temp_x), np.median(temp_y)])
                pos_std_list.append([temp_x.std(), temp_y.std()])

    temp_df = pd.DataFrame({'map_id':map_id_list,
                          'obj_num':obj_num_list,
                          'car_num':car_num_list,
                          })
    temp_df['percent_mean'] = np.array(percentage_list)[:, 0]
    temp_df['percent_std'] = np.array(percentage_list)[:, 1]
    temp_df['percent_max'] = np.array(percentage_list)[:, 2]
    temp_df['posx'] = np.array(pos_mean_list)[:, 0]
    temp_df['posy'] = np.array(pos_mean_list)[:, 1]
    temp_df['posx_max'] = np.array(pos_max_list)[:, 0]
    temp_df['posy_max'] = np.array(pos_max_list)[:, 0]
    temp_df['posx_min'] = np.array(pos_min_list)[:, 0]
    temp_df['posy_min'] = np.array(pos_min_list)[:, 1]
    temp_df['posx_median'] = np.array(pos_median_list)[:, 0]
    temp_df['posy_median'] = np.array(pos_median_list)[:, 1]
    temp_df['posx_std'] = np.array(pos_std_list)[:, 0]
    temp_df['posy_std'] = np.array(pos_std_list)[:, 1]

    temp_df=temp_df.groupby("map_id").agg({"obj_num":["mean","std"],
                                            "car_num":["mean", "std"],
                                            "percent_mean":["mean", "std"],
                                            "percent_std":["mean"],
                                            "percent_max":["mean"],
                                            "posx":["mean"],
                                            "posy":["mean"],
                                            "posx_max":["mean"],
                                            "posy_max":["mean"],
                                            "posx_min":["mean"],
                                            "posy_min":["mean"],
                                            "posx_median":["mean"],
                                            "posy_median":["mean"],
                                            "posx_std":["mean"],
                                            "posy_std":["mean"]
                                        }).reset_index()

    temp_df.columns=['map_id', 'obj_num_mean', 'obj_num_std', 'car_num_mean', 'car_num_std',
                    'percent_mean', 'percent_mean_std', 'percent_std', 'percent_max',
                    'posx', 'posy', 'posx_max', 'posy_max', 'posx_min', 'posy_min', 
                    'posx_median', 'posx_median', 'posx_std', 'posy_std']
    
    return temp_df

In [5]:
def get_data(df):
    map_id_list=[]
    label=[]
    key_frame_list=[]
    jpg_name_1=[]
    jpg_name_2=[]
    gap_time_1=[]
    gap_time_2=[]
    im_diff_mean=[]
    im_diff_std=[]
    
    for s in list(df.annotations):
        map_id=s["id"]
        map_key=s["key_frame"]
        frames=s["frames"]
        status=s["status"]
        # 解析每个 frame
        for i in range(0,len(frames)-1):
            f=frames[i]
            f_next=frames[i+1]
            """
            im=mpimg.imread(path+img_path+"/"+map_id+"/"+f["frame_name"])
            im_next=mpimg.imread(path+img_path+"/"+map_id+"/"+f_next["frame_name"])
            
            if im.shape==im_next.shape:
                im_diff=im-im_next
            else:
                im_diff=im
            
            im_diff_mean.append(np.mean(im_diff))
            im_diff_std.append(np.std(im_diff))
            """

            map_id_list.append(map_id)
            key_frame_list.append(map_key)
            jpg_name_1.append(f["frame_name"])
            jpg_name_2.append(f_next["frame_name"])
            gap_time_1.append(f["gps_time"])
            gap_time_2.append(f_next["gps_time"])
            label.append(status)
    train_df= pd.DataFrame({
        "map_id":map_id_list,
        "label":label,
        "key_frame":key_frame_list,
        "jpg_name_1":jpg_name_1,
        "jpg_name_2":jpg_name_2,
        "gap_time_1":gap_time_1,
        "gap_time_2":gap_time_2,
        #"im_diff_mean":im_diff_mean,
        #"im_diff_std":im_diff_std,
    })    
    train_df["gap"]=train_df["gap_time_2"]-train_df["gap_time_1"]
    train_df["gap_time_today"]=train_df["gap_time_1"]%(24*3600)
    train_df["hour"]=train_df["gap_time_1"].apply(lambda x:datetime.fromtimestamp(x).hour)
    train_df["minute"]=train_df["gap_time_1"].apply(lambda x:datetime.fromtimestamp(x).minute)
    train_df["day"]=train_df["gap_time_1"].apply(lambda x:datetime.fromtimestamp(x).day)
    train_df["dayofweek"]=train_df["gap_time_1"].apply(lambda x:datetime.fromtimestamp(x).weekday())
    
    train_df["key_frame"]=train_df["key_frame"].apply(lambda x:int(x.split(".")[0]))
    
    train_df=train_df.groupby("map_id").agg({"gap":["mean","std"],
                                             "hour":["mean"],
                                             "minute":["mean"],
                                             "dayofweek":["mean"],
                                             "gap_time_today":["mean","std"],
                                             #"im_diff_mean":["mean","std"],
                                             #"im_diff_std":["mean","std"],
                                             "label":["mean"],
                                            }).reset_index()
    train_df.columns=["map_id","gap_mean","gap_std",
                      "hour_mean","minute_mean","dayofweek_mean","gap_time_today_mean","gap_time_today_std",
                      #"im_diff_mean_mean","im_diff_mean_std","im_diff_std_mean","im_diff_std_std",
                      "label"]
    
    train_df["label"]=train_df["label"].apply(int)
    
    return train_df

In [6]:
def stacking(clf, train_x, train_y, test_x, clf_name, class_num, columns):
    # predictors = list(tra7in_x.columns)
    # train_x = train_x.values
    # test_x = test_x.values
    predictors = columns
    folds = 5
    seed = 2019
    kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)

    train = np.zeros((train_x.shape[0], class_num))
    test = np.zeros((test_x.shape[0], class_num))
    test_pre = np.zeros((folds, test_x.shape[0], class_num))
    test_pre_all = np.zeros((folds, test_x.shape[0]))
    cv_scores = []
    f1_scores = []
    cv_rounds = []

    for i, (train_index, test_index) in enumerate(kf.split(train_x, train_y)):
        tr_x = train_x[train_index]
        tr_y = train_y[train_index]
        te_x = train_x[test_index]
        te_y = train_y[test_index]

        if clf_name == "lgb":
            train_matrix = clf.Dataset(tr_x, label=tr_y)
            test_matrix = clf.Dataset(te_x, label=te_y)

            params = {
                'boosting_type': 'gbdt',
                'objective': 'multiclass',
                #'metric': 'None',
                'metric': 'multi_logloss',
                'min_child_weight': 1.5,
                'num_leaves': 2 ** 3-1,
                'lambda_l2': 10,
                'feature_fraction': 0.8,
                'bagging_fraction': 0.8,
                'bagging_freq': 4,
                'learning_rate': 0.05,
                'seed': 2023,
                'nthread': 28,
                'num_class': class_num,
                # 'silent': True,
                'verbose': -1,
            }

            num_round = 4000
            # early_stopping_rounds = 100
            if test_matrix:
                model = clf.train(params, train_matrix, num_round, valid_sets=test_matrix, 
                                #   verbose_eval=50
                                  #feval=acc_score_vali,
                                #   early_stopping_rounds=early_stopping_rounds
                                  )
                print("\n".join(("%s: %.2f" % x) for x in
                                list(sorted(zip(predictors, model.feature_importance("gain")), key=lambda x: x[1],
                                       reverse=True))[:200]
                                ))
                pre = model.predict(te_x, num_iteration=model.best_iteration)
                pred = model.predict(test_x, num_iteration=model.best_iteration)
                train[test_index] = pre
                test_pre[i, :] = pred
                cv_scores.append(log_loss(te_y, pre))
                
                f1_list=f1_score(te_y,np.argmax(pre,axis=1),average=None)
                f1=0.2*f1_list[0]+0.2*f1_list[1]+0.6*f1_list[2]
                
                f1_scores.append(f1)
                cv_rounds.append(model.best_iteration)
                test_pre_all[i, :] = np.argmax(pred, axis=1)

        print("%s now score is:" % clf_name, cv_scores)
        print("%s now f1-score is:" % clf_name, f1_scores)
        print("%s now round is:" % clf_name, cv_rounds)
    test[:] = test_pre.mean(axis=0)
    print("%s_score_list:" % clf_name, cv_scores)
    print("%s_score_mean:" % clf_name, np.mean(cv_scores), np.mean(f1_scores))
    print("%s_score_std:" % clf_name, np.std(cv_scores))
    return train, test, test_pre_all, np.mean(f1_scores)

In [7]:
def lgb(x_train, y_train, x_valid, columns):
    lgb_train, lgb_test, sb, cv_scores = stacking(lightgbm, x_train, y_train, x_valid, "lgb", 3, columns)
    return lgb_train, lgb_test, sb, cv_scores

In [8]:
# train_json = pd.read_json('./data/amap_traffic_annotations_train.json')
# train_data_path = './data/amap_traffic_train_0712/'

# train_obj_info = obj_detect(train_json['annotations'], train_data_path)
# import json
# with open('./result/train_obj_detect.json', 'w', encoding='utf-8') as f:
#     json.dump(train_obj_info, f, ensure_ascii=False, indent=2)

In [9]:
import copy
train_json = pd.read_json('./data/amap_traffic_annotations_train.json')
train_df_0 = copy.deepcopy(get_data(train_json[:]))

In [10]:
train_obj_df = pd.read_json('./result/train_obj_detect.json', dtype=object)
car_list = ['car', 'bus', 'truck', 'car', 'bus', 'truck']
train_df_1 = get_obj_data(train_obj_df, car_list)

In [11]:
from torchvision import transforms
from torch.utils.data import DataLoader
import torch, torchvision
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_transforms = transforms.Compose([
        # transforms.Resize(256),
        transforms.Resize(500),
        transforms.CenterCrop(480),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


model_conv = torchvision.models.resnext101_32x8d()
num_ftrs = model_conv.fc.in_features
# model_conv.fc = nn.Sequential(nn.Linear(num_ftrs, 256), 
#                               nn.ReLU(), nn.Linear(256, 32),
#                               nn.ReLU(), nn.Linear(32, 3))
model_conv.fc = nn.Sequential(nn.Linear(num_ftrs, 1024), 
                              nn.ReLU(), nn.Linear(1024, 256), 
                              nn.ReLU(), nn.Linear(256, 3))
temp_weights = torch.load('./model/resnext/1.pth')
model_conv.load_state_dict(temp_weights)
model_conv.fc = nn.Sequential(*list(model_conv.fc.children())[:-2])
_ = model_conv.eval().to(device)

In [12]:
# from torchvision import datasets

# image_datasets = datasets.ImageFolder('./data/amap_frames/', data_transforms)

# dataloaders = DataLoader(image_datasets, batch_size=1, shuffle=False, num_workers=1)
# dataset_sizes = len(image_datasets)

# output_list = []
# for inputs, _ in dataloaders:
#     outputs = model_conv(inputs.to(device))
#     output_list.append(outputs)

In [13]:
from PIL import Image

imgs = []
with torch.no_grad():
    for item in tqdm(train_json['annotations']):
        map_id = item['id']
        key_frame_name = item['key_frame']
        img = Image.open('./data/amap_traffic_train_0712/' + map_id + '/' + key_frame_name)
        imgs.append(data_transforms(img).unsqueeze(0))

input_data = torch.cat(imgs).to(device)

100%|██████████| 1500/1500 [00:27<00:00, 54.14it/s]


In [16]:
output_data_list = []
with torch.no_grad():
    for i in tqdm(range(input_data.shape[0])):
        output_data = model_conv(input_data[i].unsqueeze(0))
        output_data_list.append(output_data.cpu())

100%|██████████| 1500/1500 [00:58<00:00, 25.54it/s]


In [20]:
train_df_2 = torch.cat(output_data_list).numpy()
train_df_2 = pd.DataFrame(train_df_2, columns=['fet'+str(i) for i in range(0, 256)])

In [21]:
train_df = pd.merge(train_df_0, train_df_1, on='map_id', how='outer').fillna(-100000)
train_df = pd.concat([train_df, train_df_2], axis=1)
train_y=train_df["label"]
train_x=copy.deepcopy(train_df.drop(labels=['map_id', 'label'], axis=1))

In [22]:
from sklearn.preprocessing import Normalizer

my_transfer = Normalizer()
columns = train_x.columns
train_x = my_transfer.fit_transform(train_x)

In [23]:
# test_json = pd.read_json('./data/amap_traffic_annotations_test.json')
# test_data_path = './data/amap_traffic_test_0712/'

# test_obj_info = obj_detect(test_json['annotations'], test_data_path)
# with open('./result/test_obj_detect.json', 'w', encoding='utf-8') as f:
#     json.dump(test_obj_info, f, ensure_ascii=False, indent=2)

In [24]:
test_json = pd.read_json('./data/amap_traffic_annotations_test.json')
test_df_0=get_data(test_json[:])
test_obj_df = pd.read_json('./result/test_obj_detect.json', dtype=object)
car_list = ['car', 'bus', 'truck', 'car', 'bus', 'truck']
test_df_1 = get_obj_data(test_obj_df, car_list)

test_df = pd.merge(test_df_0, test_df_1, on='map_id', how='outer').fillna(-100000)

In [25]:
valid_x = copy.deepcopy(test_df.drop(labels=['map_id', 'label'], axis=1))
# columns = valid_x.columns
valid_x = my_transfer.fit_transform(valid_x)

In [26]:
lgb_train, lgb_test, sb, m=lgb(train_x, train_y, train_x, columns)
# sub=test_df[["map_id"]].copy()
# sub["pred"]=np.argmax(lgb_test,axis=1)

# result_dic=dict(zip(sub["map_id"],sub["pred"]))
# #保存
# import json
# with open(path+"amap_traffic_annotations_test.json","r") as f:
#     content=f.read()
# content=json.loads(content)
# for i in content["annotations"]:
#     i['status']=result_dic[i["id"]]
# with open(result_path+"sub_%s.json"%m,"w") as f:
#     f.write(json.dumps(content))

[1]	valid_0's multi_logloss: 0.727961
[2]	valid_0's multi_logloss: 0.713388
[3]	valid_0's multi_logloss: 0.702494
[4]	valid_0's multi_logloss: 0.691399
[5]	valid_0's multi_logloss: 0.677956
[6]	valid_0's multi_logloss: 0.667051
[7]	valid_0's multi_logloss: 0.657981
[8]	valid_0's multi_logloss: 0.646781
[9]	valid_0's multi_logloss: 0.638022
[10]	valid_0's multi_logloss: 0.62951
[11]	valid_0's multi_logloss: 0.620905
[12]	valid_0's multi_logloss: 0.614224
[13]	valid_0's multi_logloss: 0.606987
[14]	valid_0's multi_logloss: 0.601428
[15]	valid_0's multi_logloss: 0.595159
[16]	valid_0's multi_logloss: 0.5896
[17]	valid_0's multi_logloss: 0.584773
[18]	valid_0's multi_logloss: 0.580405
[19]	valid_0's multi_logloss: 0.576735
[20]	valid_0's multi_logloss: 0.573128
[21]	valid_0's multi_logloss: 0.570067
[22]	valid_0's multi_logloss: 0.566537
[23]	valid_0's multi_logloss: 0.56349
[24]	valid_0's multi_logloss: 0.558785
[25]	valid_0's multi_logloss: 0.553971
[26]	valid_0's multi_logloss: 0.550333