In [1]:
import random, os
import numpy as np
import torch
import glob

def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [2]:
# !pip install piexif

In [3]:
import pandas as pd
import os
from PIL import Image, ExifTags
import numpy as np
import pyheif 
from tqdm.notebook import tqdm
import json
import matplotlib.pyplot as plt
import piexif

In [4]:
import cv2 as cv2
def rotate_image(image, angle):
    image_center = tuple(np.array(image.shape[1::-1]) / 2)
    rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
    result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
    return result

In [5]:
def resize_image(img):
    scale_percent = 20 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    # resize image
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized


In [6]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

In [7]:
from UNet import UNet
import torch

model=UNet()
model.load_state_dict(torch.load("plate_dataset/best-upgraded-plate-mask.pt"))
model.eval()
;

''

In [8]:
def draw_box(img,box):
    color = (255, 0, 0)
    thickness = 3
    
    image=img.copy()
    image = cv2.line(image, box[0], box[1], color, thickness)
    image = cv2.line(image, box[2], box[1], color, thickness)
    image = cv2.line(image, box[2], box[3], color, thickness)
    image = cv2.line(image, box[0], box[3], color, thickness)
    
    return image

# check plates

In [9]:
with open("_root_stepik_hacaton_ulyanovsk_dataset_train.json","r") as f:
    plates = json.load(f)
    
len(plates)

530

In [10]:
plates[0]

{'path': '/root/stepik/hacaton_ulyanovsk/dataset/train/img_2928.jpg',
 'found_plate': True,
 'plate': [[1705, 1570, 1915, 1634]],
 'shape': [3024, 4032, 3],
 'angle': 0}

In [11]:
len(list(filter(lambda item: item['found_plate'], plates)))

521

In [12]:
!pip install exifread



In [13]:
car_model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')
car_model.classes = [0, 2]

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-8-10 Python-3.8.10 torch-1.13.0.dev20220701+cu116 CUDA:0 (NVIDIA GeForce RTX 3090, 24268MiB)

Fusing layers... 
YOLOv5x6 summary: 574 layers, 140730220 parameters, 0 gradients
Adding AutoShape... 


In [14]:
import exifread
import io
import re

train_car_data = []
image_exif={}
for img_path in glob.glob('dataset/train/*'):
    if ".jpg" not in img_path and ".heic" not in img_path:
        print(img_path)
        continue
        
    img_name=img_path.split('/')[-1]
        
    if 'heic' in img_path:
        heif_file = pyheif.read(img_path)
        
        for metadata in heif_file.metadata:

            if metadata['type'] == 'Exif':
                fstream = io.BytesIO(metadata['data'][6:])

                exifdata = exifread.process_file(fstream,details=False)
                m = re.search('\s+(\d+.\d+)mm', str(exifdata))
                if m:
                    found = m.group(1)
                    image_exif[img_name]=float(found)
                else:
                    image_exif[img_name]=1.5
    else:
        exif_dict = piexif.load(img_path)
        v1,v2=exif_dict['Exif'][37386]
        image_exif[img_name]=v1/v2
        
    
    if 'heic' in img_path:
        heif_file = pyheif.read(img_path)
        img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    else:
        img = Image.open(img_path)
        
    img=np.array(img)
    
    for p in plates:
        if p['path']==img_path:
            if p['found_plate'] and p['angle']!=0:
                img=rotate_image(img,p['angle'])
            break
        
    results = car_model(img)
    
#     if len(results.xyxy)>1:
#         break
    if results.xyxy[0].shape != torch.Size([0, 6]):
        results = [img_name,img.shape[0],img.shape[1]] + results.xyxy[0][0].cpu().numpy().tolist()
        train_car_data.append(results)


In [15]:
results

['img_2724.heic',
 3024,
 4032,
 1098.057373046875,
 1334.7685546875,
 1536.3966064453125,
 1611.6895751953125,
 0.9071996212005615,
 2.0]

In [16]:
len(train_car_data)

529

In [17]:
len(image_exif)

530

In [18]:
image_delta_coords={}


for plate in tqdm(plates):
    if not plate['found_plate']:
        continue
        
    if 'heic' in plate['path']:
        heif_file = pyheif.read(plate['path'])
        img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    else:
        img = Image.open(plate['path'])
        
        
    img=np.array(img)
    if plate['angle']!=0:
        img=rotate_image(img,plate['angle'])
        
    plate_index=0
    for x1,y1,x2,y2 in plate['plate']:
        image=img
        x_min=max(0,x1-50)
        x_max=min(plate['shape'][1],x2+50)
        y_min=max(0,y1-50)
        y_max=min(plate['shape'][0],y2+50)

        resized = image[y_min:y_max,x_min:x_max] #resize_image(image)

        im = Image.fromarray(resized)
        croped_path=f"dataset/small_train_solution7/{plate['path'].split('/')[-1]}_{plate_index}.jpg"
        plate_index+=1
        im.save(croped_path)
        image_delta_coords[croped_path.split('/')[-1]]={'orig':plate['path'],'croped':croped_path,'coord':[x1,y1,x2,y2],'delta':[x_min,x_max,y_min,y_max]}
        
    
    
        

  0%|          | 0/530 [00:00<?, ?it/s]

In [19]:
len(image_delta_coords)

523

In [20]:

len(glob.glob('dataset/small_train_solution7/*.jpg'))

523

In [21]:
from skimage.io import imread
import os
import json
import glob

images = []
images_file = []

for img_path in glob.glob('dataset/small_train_solution7/*.jpg'):
    images.append(imread(img_path))
    images_file.append(img_path.split("/")[-1])

In [22]:
len(images_file)

523

In [23]:
from skimage.transform import resize
import numpy as np

size = (256, 256)
X = [resize(x, size, mode='constant', anti_aliasing=True,) for x in images]
X = np.array(X, np.float32)

In [24]:
X.shape

(523, 256, 256, 3)

In [25]:
X_tensor=torch.tensor(X)
X_tensor=torch.moveaxis(X_tensor, 3, 1)

In [26]:
model.to(device)
all_predictions = []
with torch.no_grad():
    start=0
    batch_size=10
    while start<X_tensor.shape[0]:
#         print('start=',start)
        batch=X_tensor[start:min(start+batch_size,X_tensor.shape[0])]
        predictions = list(model(batch.to(device)).cpu().numpy())
        all_predictions+=predictions
        start+=batch_size


In [27]:
import numpy as np
import cv2 as cv2
from random import randrange

plate_sizes=[]
for i in range(len(all_predictions)):
    predict=all_predictions[i]
    mask = predict.squeeze()
    mask[mask>0]=255
    mask[mask<0]=0
    img=images[i]
    size= img.shape
    new_mask = resize(mask, size, mode='constant', anti_aliasing=True,)
    
    new_mask=new_mask.astype(np.uint8)
    gray = cv2.cvtColor(new_mask, cv2.COLOR_BGR2GRAY)
    
    ret, thresh = cv2.threshold(gray, 127, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_TC89_L1)
    
    boxed_path=f"dataset/small_train_solution7_plate_box/{images_file[i]}_{randrange(10)}.jpg"
    if len(contours)==0:
        im = Image.fromarray(img)
        im.save(boxed_path)
        print('empty:',images_file[i])
        continue
        
    cnt = contours[0]
    rect = cv2.minAreaRect(cnt)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    boxed_img=draw_box(img,box)
    im = Image.fromarray(boxed_img)
    im.save(boxed_path)
    
    
    d1=pow(box[0][0]-box[1][0],2)+pow(box[0][1]-box[1][1],2)
    d2=pow(box[2][0]-box[1][0],2)+pow(box[2][1]-box[1][1],2)
    max_d=max(d1,d2)
    min_d=min(d1,d2)

    plate_i={'image_name':images_file[i],'max_d':max_d,'min_d':min_d}
    
    if images_file[i] not in image_delta_coords:
        continue
        
    plate_i['x1']=image_delta_coords[images_file[i]]['coord'][0]
    plate_i['y1']=image_delta_coords[images_file[i]]['coord'][1]
    plate_i['x2']=image_delta_coords[images_file[i]]['coord'][2]
    plate_i['y2']=image_delta_coords[images_file[i]]['coord'][3]
    plate_sizes.append(plate_i)


empty: img_2716.jpg_0.jpg


In [28]:
len(plate_sizes)

522

In [29]:
plate_sizes[0]

{'image_name': 'img_2584.jpg_0.jpg',
 'max_d': 88245,
 'min_d': 3725,
 'x1': 1720,
 'y1': 1489,
 'x2': 2015,
 'y2': 1563}

In [30]:
images_file[0]


'img_2584.jpg_0.jpg'

сандартный размер 520x112

In [31]:
520/112

4.642857142857143

In [32]:
import math

plate_size_dict={}
for i in range(len(plate_sizes)):
    if plate_sizes[i]['min_d']==0:
        continue
    
    real_name=plate_sizes[i]['image_name'].split('_')[0]+"_"+plate_sizes[i]['image_name'].split('_')[1]
    box_div=math.sqrt(plate_sizes[i]['max_d'])/math.sqrt(plate_sizes[i]['min_d'])
    max_d=math.sqrt(plate_sizes[i]['max_d'])
    min_d=math.sqrt(plate_sizes[i]['min_d'])
    if real_name in plate_size_dict:
        if abs(plate_size_dict[real_name]['box_div']-520/112)<abs(box_div-520/112):
            continue
    plate_size_dict[real_name]={
        'box_div':box_div,
        'max_d':max_d,
        'min_d':min_d,
        'x1':plate_sizes[i]['x1'],
        'y1':plate_sizes[i]['y1'],
        'x2':plate_sizes[i]['x2'],
        'y2':plate_sizes[i]['y2'],
    }
    
plate_size_dataset=[]
for k in plate_size_dict:
    plate_size_dataset.append({
        'image_name':k, 
        'box_div':plate_size_dict[k]['box_div'],
        'max_d':plate_size_dict[k]['max_d'],
        'min_d':plate_size_dict[k]['min_d'],
        'x1':plate_size_dict[k]['x1'],
        'y1':plate_size_dict[k]['y1'],
        'x2':plate_size_dict[k]['x2'],
        'y2':plate_size_dict[k]['y2']
    })


In [33]:
test_img_names = set(os.listdir('dataset/test'))
train_img_names = set(os.listdir('dataset/train'))

In [34]:
train_labels_df = pd.read_csv('dataset/train.csv', sep=';', index_col=None)

In [35]:
train_labels_names = set(train_labels_df['image_name'].values)

In [36]:
train_labels_names.intersection(test_img_names)

set()

In [37]:
dist_dict = dict(zip(train_labels_df.image_name, train_labels_df.distance))

In [38]:
train_labels_df.head()

Unnamed: 0,image_name,distance
0,img_1596.jpg,4.88
1,img_1600.jpg,1.54
2,img_1601.jpg,3.68
3,img_1603.jpg,2.22
4,img_1605.jpg,3.73


In [39]:
train_data_df = pd.DataFrame(train_car_data, columns = ['image_name','height','width', 'x_min', 'y_min', 'x_max', 'y_max', 'conf', 'class'])
train_data_df.head()



Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class
0,img_2928.jpg,3024,4032,1531.042969,1391.25061,2080.437256,1828.963257,0.85858,2.0
1,img_1966.jpg,3024,4032,1688.050781,1301.404297,2103.450195,1649.081299,0.92124,2.0
2,img_1980.jpg,3024,4032,1224.301025,1100.384888,2526.190674,2195.380615,0.955414,2.0
3,img_2275.jpg,3024,4032,1714.348145,1358.682861,2078.894531,1709.497681,0.884075,2.0
4,img_2344.jpg,3024,4032,735.867859,1025.884155,1655.523315,1637.836548,0.933711,2.0


In [40]:
train_data_df[train_data_df["image_name"]=="img_1600.jpg"]

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class
441,img_1600.jpg,3024,4032,1128.866821,1178.612793,3017.595459,2701.986084,0.949038,2.0


In [41]:
for ttt in train_car_data:
    if ttt[0]=="img_1600.jpg":
        print(ttt)

['img_1600.jpg', 3024, 4032, 1128.8668212890625, 1178.61279296875, 3017.595458984375, 2701.986083984375, 0.9490377306938171, 2.0]


In [42]:
train_plate_df = pd.DataFrame(plate_size_dataset, columns = ['image_name', 'max_d','min_d','box_div','x1','y1','x2','y2'])

train_data_df = pd.merge(train_data_df,train_plate_df, how='left')
train_data_df.head(10)


Unnamed: 0,image_name,height,width,x_min,y_min,...,box_div,x1,y1,x2,y2
0,img_2928.jpg,3024,4032,1531.042969,1391.25061,...,4.896148,1705.0,1570.0,1915.0,1634.0
1,img_1966.jpg,3024,4032,1688.050781,1301.404297,...,5.230769,1814.0,1453.0,1970.0,1494.0
2,img_1980.jpg,3024,4032,1224.301025,1100.384888,...,4.869529,1648.0,1766.0,2150.0,1880.0
3,img_2275.jpg,3024,4032,1714.348145,1358.682861,...,4.60642,1845.0,1600.0,1974.0,1642.0
4,img_2344.jpg,3024,4032,735.867859,1025.884155,...,5.427242,1866.0,1576.0,2098.0,1636.0
5,img_1690.jpg,3024,4032,3.03623,1088.786621,...,5.157819,1820.0,1486.0,2012.0,1544.0
6,img_2336.jpg,3024,4032,1646.39978,1255.998779,...,4.909091,1806.0,1446.0,1950.0,1495.0
7,img_2892.jpg,3024,4032,4.159897,1251.89917,...,4.962423,1788.0,1518.0,2042.0,1583.0
8,img_2313.jpg,3024,4032,575.039001,937.56311,...,5.0,1823.0,1287.0,1999.0,1344.0
9,img_1608.jpg,3024,4032,2844.421143,1254.356567,...,4.968788,1968.0,1470.0,2131.0,1510.0


In [43]:
image_exif_dataset=[]
for k in image_exif:
    image_exif_dataset.append({'image_name':k,'focal_lenght':image_exif[k]})

In [44]:
train_image_exif_df = pd.DataFrame(image_exif_dataset, columns = ['image_name', 'focal_lenght'])
train_image_exif_df.head()



Unnamed: 0,image_name,focal_lenght
0,img_2928.jpg,1.54
1,img_1966.jpg,1.54
2,img_1980.jpg,1.54
3,img_2275.jpg,1.54
4,img_2344.jpg,1.54


In [45]:
train_data_df = pd.merge(train_labels_df, train_data_df, how='left')
train_data_df.head(10)

Unnamed: 0,image_name,distance,height,width,x_min,...,box_div,x1,y1,x2,y2
0,img_1596.jpg,4.88,3024.0,4032.0,1666.326904,...,4.832198,1877.0,1570.0,2089.0,1624.0
1,img_1600.jpg,1.54,3024.0,4032.0,1128.866821,...,,,,,
2,img_1601.jpg,3.68,3024.0,4032.0,2440.101318,...,4.810124,1885.0,1652.0,2152.0,1714.0
3,img_1603.jpg,2.22,3024.0,4032.0,2606.66333,...,4.824585,1763.0,1793.0,2245.0,1898.0
4,img_1605.jpg,3.73,3024.0,4032.0,2485.122559,...,4.861622,1923.0,1558.0,2189.0,1620.0
5,img_1606.jpg,4.52,3024.0,4032.0,2402.554443,...,4.853292,1880.0,1398.0,2118.0,1458.0
6,img_1607.jpg,5.4,3024.0,4032.0,2396.992188,...,4.949022,1948.0,1589.0,2111.0,1645.0
7,img_1608.jpg,6.21,3024.0,4032.0,2844.421143,...,4.968788,1968.0,1470.0,2131.0,1510.0
8,img_1612.jpg,7.12,3024.0,4032.0,2689.677979,...,5.130592,1898.0,1594.0,2040.0,1632.0
9,img_1613.jpg,5.26,3024.0,4032.0,2892.4021,...,4.870464,1919.0,1473.0,2071.0,1523.0


In [46]:
train_data_df = pd.merge(train_data_df, train_image_exif_df, how='left')
train_data_df.head(10)

Unnamed: 0,image_name,distance,height,width,x_min,...,x1,y1,x2,y2,focal_lenght
0,img_1596.jpg,4.88,3024.0,4032.0,1666.326904,...,1877.0,1570.0,2089.0,1624.0,1.54
1,img_1600.jpg,1.54,3024.0,4032.0,1128.866821,...,,,,,1.54
2,img_1601.jpg,3.68,3024.0,4032.0,2440.101318,...,1885.0,1652.0,2152.0,1714.0,1.54
3,img_1603.jpg,2.22,3024.0,4032.0,2606.66333,...,1763.0,1793.0,2245.0,1898.0,1.54
4,img_1605.jpg,3.73,3024.0,4032.0,2485.122559,...,1923.0,1558.0,2189.0,1620.0,1.54
5,img_1606.jpg,4.52,3024.0,4032.0,2402.554443,...,1880.0,1398.0,2118.0,1458.0,1.54
6,img_1607.jpg,5.4,3024.0,4032.0,2396.992188,...,1948.0,1589.0,2111.0,1645.0,1.54
7,img_1608.jpg,6.21,3024.0,4032.0,2844.421143,...,1968.0,1470.0,2131.0,1510.0,1.54
8,img_1612.jpg,7.12,3024.0,4032.0,2689.677979,...,1898.0,1594.0,2040.0,1632.0,1.54
9,img_1613.jpg,5.26,3024.0,4032.0,2892.4021,...,1919.0,1473.0,2071.0,1523.0,1.54


In [47]:
train_data_df['calc_dist_min']=train_data_df['focal_lenght']*112/train_data_df['min_d']
train_data_df['calc_dist_max']=train_data_df['focal_lenght']*520/train_data_df['max_d']
train_data_df.head(10)

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,1666.326904,...,2089.0,1624.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,1128.866821,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,2440.101318,...,2152.0,1714.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,2606.66333,...,2245.0,1898.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,2485.122559,...,2189.0,1620.0,1.54,2.973351,2.839556
5,img_1606.jpg,4.52,3024.0,4032.0,2402.554443,...,2118.0,1458.0,1.54,3.592554,3.436783
6,img_1607.jpg,5.4,3024.0,4032.0,2396.992188,...,2111.0,1645.0,1.54,4.310653,4.04398
7,img_1608.jpg,6.21,3024.0,4032.0,2844.421143,...,2131.0,1510.0,1.54,5.070748,4.73813
8,img_1612.jpg,7.12,3024.0,4032.0,2689.677979,...,2040.0,1632.0,1.54,5.746142,5.19989
9,img_1613.jpg,5.26,3024.0,4032.0,2892.4021,...,2071.0,1523.0,1.54,4.421111,4.214503


In [48]:
train_data_df['x_min']=train_data_df['x_min']-train_data_df['width']/2
train_data_df['x_max']=train_data_df['x_max']-train_data_df['width']/2
train_data_df['y_min']=train_data_df['y_min']-train_data_df['height']/2
train_data_df['y_max']=train_data_df['y_max']-train_data_df['height']/2

train_data_df['x1']=train_data_df['x1']-train_data_df['width']/2
train_data_df['x2']=train_data_df['x2']-train_data_df['width']/2
train_data_df['y1']=train_data_df['y1']-train_data_df['height']/2
train_data_df['y2']=train_data_df['y2']-train_data_df['height']/2

train_data_df.head(10)

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,-349.673096,...,73.0,112.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,-887.133179,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,424.101318,...,136.0,202.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,590.66333,...,229.0,386.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,469.122559,...,173.0,108.0,1.54,2.973351,2.839556
5,img_1606.jpg,4.52,3024.0,4032.0,386.554443,...,102.0,-54.0,1.54,3.592554,3.436783
6,img_1607.jpg,5.4,3024.0,4032.0,380.992188,...,95.0,133.0,1.54,4.310653,4.04398
7,img_1608.jpg,6.21,3024.0,4032.0,828.421143,...,115.0,-2.0,1.54,5.070748,4.73813
8,img_1612.jpg,7.12,3024.0,4032.0,673.677979,...,24.0,120.0,1.54,5.746142,5.19989
9,img_1613.jpg,5.26,3024.0,4032.0,876.4021,...,55.0,11.0,1.54,4.421111,4.214503


# генерим 2 модель только с номерами, на случай если не нашли машину

In [49]:
train_data_df_model2 = train_plate_df.copy()
train_data_df_model2.head()

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2
0,img_2584.jpg,297.0606,61.032778,4.867231,1720,1489,2015,1563
1,img_1902.jpg,270.185122,54.037024,5.0,1744,1709,2007,1778
2,img_1818.jpg,315.0,65.0,4.846154,1821,1616,2095,1695
3,img_2385.jpg,316.01424,63.007936,5.015467,1904,1641,2205,1722
4,img_2701.heic,219.009132,43.011626,5.091859,1858,1608,2064,1669


In [50]:
train_data_df_model2 = pd.merge(train_labels_df, train_data_df_model2, how='left')
train_data_df_model2.head(10)

Unnamed: 0,image_name,distance,max_d,min_d,box_div,x1,y1,x2,y2
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,1877.0,1570.0,2089.0,1624.0
1,img_1600.jpg,1.54,,,,,,,
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,1885.0,1652.0,2152.0,1714.0
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,1763.0,1793.0,2245.0,1898.0
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,1923.0,1558.0,2189.0,1620.0
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,1880.0,1398.0,2118.0,1458.0
6,img_1607.jpg,5.4,198.022726,40.012498,4.949022,1948.0,1589.0,2111.0,1645.0
7,img_1608.jpg,6.21,169.011834,34.014703,4.968788,1968.0,1470.0,2131.0,1510.0
8,img_1612.jpg,7.12,154.003247,30.016662,5.130592,1898.0,1594.0,2040.0,1632.0
9,img_1613.jpg,5.26,190.010526,39.012818,4.870464,1919.0,1473.0,2071.0,1523.0


In [51]:
train_data_df[['image_name','height','width']].head()

Unnamed: 0,image_name,height,width
0,img_1596.jpg,3024.0,4032.0
1,img_1600.jpg,3024.0,4032.0
2,img_1601.jpg,3024.0,4032.0
3,img_1603.jpg,3024.0,4032.0
4,img_1605.jpg,3024.0,4032.0


In [52]:
train_data_df_model2 = pd.merge(train_data_df_model2, train_data_df[['image_name','height','width']], how='left')
train_data_df_model2.head(10)

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,y1,x2,y2,height,width
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,1570.0,2089.0,1624.0,3024.0,4032.0
1,img_1600.jpg,1.54,,,,...,,,,3024.0,4032.0
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,1652.0,2152.0,1714.0,3024.0,4032.0
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,1793.0,2245.0,1898.0,3024.0,4032.0
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,1558.0,2189.0,1620.0,3024.0,4032.0
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,1398.0,2118.0,1458.0,3024.0,4032.0
6,img_1607.jpg,5.4,198.022726,40.012498,4.949022,...,1589.0,2111.0,1645.0,3024.0,4032.0
7,img_1608.jpg,6.21,169.011834,34.014703,4.968788,...,1470.0,2131.0,1510.0,3024.0,4032.0
8,img_1612.jpg,7.12,154.003247,30.016662,5.130592,...,1594.0,2040.0,1632.0,3024.0,4032.0
9,img_1613.jpg,5.26,190.010526,39.012818,4.870464,...,1473.0,2071.0,1523.0,3024.0,4032.0


In [53]:
train_data_df_model2['x1']=train_data_df_model2['x1']-train_data_df_model2['width']/2
train_data_df_model2['x2']=train_data_df_model2['x2']-train_data_df_model2['width']/2
train_data_df_model2['y1']=train_data_df_model2['y1']-train_data_df_model2['height']/2
train_data_df_model2['y2']=train_data_df_model2['y2']-train_data_df_model2['height']/2

In [54]:
train_data_df_model2 = pd.merge(train_data_df_model2, train_image_exif_df, how='left')
train_data_df_model2.head(10)

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,73.0,112.0,3024.0,4032.0,1.54
1,img_1600.jpg,1.54,,,,...,,,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,136.0,202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,229.0,386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,173.0,108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,102.0,-54.0,3024.0,4032.0,1.54
6,img_1607.jpg,5.4,198.022726,40.012498,4.949022,...,95.0,133.0,3024.0,4032.0,1.54
7,img_1608.jpg,6.21,169.011834,34.014703,4.968788,...,115.0,-2.0,3024.0,4032.0,1.54
8,img_1612.jpg,7.12,154.003247,30.016662,5.130592,...,24.0,120.0,3024.0,4032.0,1.54
9,img_1613.jpg,5.26,190.010526,39.012818,4.870464,...,55.0,11.0,3024.0,4032.0,1.54


In [55]:
train_data_df_model2 = train_data_df_model2[train_data_df_model2['max_d'].notna()]
train_data_df_model2.head(10)

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,73.0,112.0,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,136.0,202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,229.0,386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,173.0,108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,102.0,-54.0,3024.0,4032.0,1.54
6,img_1607.jpg,5.4,198.022726,40.012498,4.949022,...,95.0,133.0,3024.0,4032.0,1.54
7,img_1608.jpg,6.21,169.011834,34.014703,4.968788,...,115.0,-2.0,3024.0,4032.0,1.54
8,img_1612.jpg,7.12,154.003247,30.016662,5.130592,...,24.0,120.0,3024.0,4032.0,1.54
9,img_1613.jpg,5.26,190.010526,39.012818,4.870464,...,55.0,11.0,3024.0,4032.0,1.54
10,img_1615.jpg,2.26,524.0,112.0,4.678571,...,283.0,324.0,3024.0,4032.0,4.25


# Аугментация
данные центрированы, поэтому просто отзеркалить относительно осей

In [56]:
train_data_df.head()

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,-349.673096,...,73.0,112.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,-887.133179,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,424.101318,...,136.0,202.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,590.66333,...,229.0,386.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,469.122559,...,173.0,108.0,1.54,2.973351,2.839556


In [57]:
horizont_train_data_df=train_data_df.copy()
col_names=['x_min','x_max','x1','x2']
for col in col_names:
    horizont_train_data_df[col]=-horizont_train_data_df[col]

horizont_train_data_df.head()

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,349.673096,...,-73.0,112.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,887.133179,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,-424.101318,...,-136.0,202.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,-590.66333,...,-229.0,386.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,-469.122559,...,-173.0,108.0,1.54,2.973351,2.839556


In [58]:
vertical_train_data_df=train_data_df.copy()
col_names=['y_min','y_max','y1','y2']
for col in col_names:
    vertical_train_data_df[col]=-vertical_train_data_df[col]


vertical_train_data_df.head()

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,-349.673096,...,73.0,-112.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,-887.133179,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,424.101318,...,136.0,-202.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,590.66333,...,229.0,-386.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,469.122559,...,173.0,-108.0,1.54,2.973351,2.839556


In [59]:
center_train_data_df=train_data_df.copy()
col_names=['y_min','y_max','y1','y2','x_min','x_max','x1','x2']
for col in col_names:
    center_train_data_df[col]=-center_train_data_df[col]

center_train_data_df.head()

Unnamed: 0,image_name,distance,height,width,x_min,...,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_1596.jpg,4.88,3024.0,4032.0,349.673096,...,-73.0,-112.0,1.54,4.105503,3.944636
1,img_1600.jpg,1.54,3024.0,4032.0,887.133179,...,,,1.54,,
2,img_1601.jpg,3.68,3024.0,4032.0,-424.101318,...,-136.0,-202.0,1.54,2.973351,2.869956
3,img_1603.jpg,2.22,3024.0,4032.0,-590.66333,...,-229.0,-386.0,1.54,1.77805,1.711076
4,img_1605.jpg,3.73,3024.0,4032.0,-469.122559,...,-173.0,-108.0,1.54,2.973351,2.839556


In [60]:
temp_df =pd.concat([train_data_df, horizont_train_data_df,vertical_train_data_df,center_train_data_df])
temp_df.shape

(2120, 20)

### для номеров

In [61]:
train_data_df_model2.head()

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,73.0,112.0,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,136.0,202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,229.0,386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,173.0,108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,102.0,-54.0,3024.0,4032.0,1.54


In [62]:
horizont_train_data_df_model2=train_data_df_model2.copy()
col_names=['x1','x2']
for col in col_names:
    horizont_train_data_df_model2[col]=-horizont_train_data_df_model2[col]

horizont_train_data_df_model2.head()

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,-73.0,112.0,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,-136.0,202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,-229.0,386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,-173.0,108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,-102.0,-54.0,3024.0,4032.0,1.54


In [63]:
vertical_train_data_df_model2=train_data_df_model2.copy()
col_names=['y1','y2']
for col in col_names:
    vertical_train_data_df_model2[col]=-vertical_train_data_df_model2[col]


vertical_train_data_df_model2.head()

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,73.0,-112.0,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,136.0,-202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,229.0,-386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,173.0,-108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,102.0,54.0,3024.0,4032.0,1.54


In [64]:
center_train_data_df_model2=train_data_df_model2.copy()
col_names=['y1','y2','x1','x2']
for col in col_names:
    center_train_data_df_model2[col]=-center_train_data_df_model2[col]

center_train_data_df_model2.head()

Unnamed: 0,image_name,distance,max_d,min_d,box_div,...,x2,y2,height,width,focal_lenght
0,img_1596.jpg,4.88,203.009852,42.011903,4.832198,...,-73.0,-112.0,3024.0,4032.0,1.54
2,img_1601.jpg,3.68,279.028672,58.00862,4.810124,...,-136.0,-202.0,3024.0,4032.0,1.54
3,img_1603.jpg,2.22,468.009615,97.005155,4.824585,...,-229.0,-386.0,3024.0,4032.0,1.54
4,img_1605.jpg,3.73,282.015957,58.00862,4.861622,...,-173.0,-108.0,3024.0,4032.0,1.54
5,img_1606.jpg,4.52,233.008584,48.010416,4.853292,...,-102.0,54.0,3024.0,4032.0,1.54


In [65]:
temp_df_model2 =pd.concat([train_data_df_model2, horizont_train_data_df_model2,vertical_train_data_df_model2,center_train_data_df_model2])
temp_df_model2.shape



(2060, 12)

# Тренировка

In [66]:
from catboost import CatBoostRegressor

In [67]:
model_2 = CatBoostRegressor(custom_metric=['R2'],depth=14,iterations=1000)
# model_2.fit(train_data_df[['x_min', 'y_min', 'x_max', 'y_max', 'conf', 'max_d','min_d','x1','y1','x2','y2','focal_lenght']], train_data_df[['distance']].values)
model_2.fit(temp_df[['x_min', 'y_min', 'x_max', 'y_max', 'conf', 'max_d','min_d','x1','y1','x2','y2','focal_lenght']], temp_df[['distance']].values)


Learning rate set to 0.046104
0:	learn: 1.6949935	total: 58.9ms	remaining: 58.9s
1:	learn: 1.6377188	total: 215ms	remaining: 1m 47s
2:	learn: 1.5808563	total: 364ms	remaining: 2m
3:	learn: 1.5252073	total: 373ms	remaining: 1m 32s
4:	learn: 1.4757225	total: 502ms	remaining: 1m 39s
5:	learn: 1.4268170	total: 645ms	remaining: 1m 46s
6:	learn: 1.3778004	total: 798ms	remaining: 1m 53s
7:	learn: 1.3321309	total: 818ms	remaining: 1m 41s
8:	learn: 1.2913767	total: 964ms	remaining: 1m 46s
9:	learn: 1.2493365	total: 1.1s	remaining: 1m 49s
10:	learn: 1.2095082	total: 1.25s	remaining: 1m 52s
11:	learn: 1.1728280	total: 1.38s	remaining: 1m 53s
12:	learn: 1.1343682	total: 1.44s	remaining: 1m 49s
13:	learn: 1.1013278	total: 1.57s	remaining: 1m 50s
14:	learn: 1.0694154	total: 1.7s	remaining: 1m 51s
15:	learn: 1.0362701	total: 1.84s	remaining: 1m 53s
16:	learn: 1.0066501	total: 1.86s	remaining: 1m 47s
17:	learn: 0.9772593	total: 1.99s	remaining: 1m 48s
18:	learn: 0.9508223	total: 2.13s	remaining: 1m 50

160:	learn: 0.2070095	total: 21.5s	remaining: 1m 51s
161:	learn: 0.2061104	total: 21.6s	remaining: 1m 51s
162:	learn: 0.2047822	total: 21.7s	remaining: 1m 51s
163:	learn: 0.2041221	total: 21.9s	remaining: 1m 51s
164:	learn: 0.2030038	total: 22s	remaining: 1m 51s
165:	learn: 0.2020576	total: 22.2s	remaining: 1m 51s
166:	learn: 0.2010201	total: 22.3s	remaining: 1m 51s
167:	learn: 0.1998546	total: 22.5s	remaining: 1m 51s
168:	learn: 0.1988826	total: 22.6s	remaining: 1m 51s
169:	learn: 0.1979394	total: 22.8s	remaining: 1m 51s
170:	learn: 0.1967809	total: 22.9s	remaining: 1m 51s
171:	learn: 0.1961217	total: 23.1s	remaining: 1m 51s
172:	learn: 0.1954051	total: 23.2s	remaining: 1m 51s
173:	learn: 0.1946110	total: 23.4s	remaining: 1m 50s
174:	learn: 0.1940476	total: 23.5s	remaining: 1m 50s
175:	learn: 0.1932671	total: 23.7s	remaining: 1m 50s
176:	learn: 0.1925800	total: 23.8s	remaining: 1m 50s
177:	learn: 0.1918245	total: 23.9s	remaining: 1m 50s
178:	learn: 0.1912985	total: 24.1s	remaining: 1m

316:	learn: 0.0992450	total: 43.4s	remaining: 1m 33s
317:	learn: 0.0988728	total: 43.5s	remaining: 1m 33s
318:	learn: 0.0986298	total: 43.7s	remaining: 1m 33s
319:	learn: 0.0982181	total: 43.8s	remaining: 1m 33s
320:	learn: 0.0979648	total: 44s	remaining: 1m 32s
321:	learn: 0.0976985	total: 44.1s	remaining: 1m 32s
322:	learn: 0.0974811	total: 44.3s	remaining: 1m 32s
323:	learn: 0.0969623	total: 44.4s	remaining: 1m 32s
324:	learn: 0.0967047	total: 44.5s	remaining: 1m 32s
325:	learn: 0.0962798	total: 44.7s	remaining: 1m 32s
326:	learn: 0.0959016	total: 44.8s	remaining: 1m 32s
327:	learn: 0.0955209	total: 44.9s	remaining: 1m 32s
328:	learn: 0.0951508	total: 45.1s	remaining: 1m 31s
329:	learn: 0.0948206	total: 45.2s	remaining: 1m 31s
330:	learn: 0.0944214	total: 45.4s	remaining: 1m 31s
331:	learn: 0.0938837	total: 45.5s	remaining: 1m 31s
332:	learn: 0.0935939	total: 45.6s	remaining: 1m 31s
333:	learn: 0.0929538	total: 45.8s	remaining: 1m 31s
334:	learn: 0.0926327	total: 45.9s	remaining: 1m

472:	learn: 0.0540671	total: 1m 5s	remaining: 1m 12s
473:	learn: 0.0538341	total: 1m 5s	remaining: 1m 12s
474:	learn: 0.0535810	total: 1m 5s	remaining: 1m 12s
475:	learn: 0.0534746	total: 1m 5s	remaining: 1m 12s
476:	learn: 0.0533687	total: 1m 5s	remaining: 1m 12s
477:	learn: 0.0532215	total: 1m 6s	remaining: 1m 12s
478:	learn: 0.0530855	total: 1m 6s	remaining: 1m 12s
479:	learn: 0.0527809	total: 1m 6s	remaining: 1m 11s
480:	learn: 0.0525319	total: 1m 6s	remaining: 1m 11s
481:	learn: 0.0522348	total: 1m 6s	remaining: 1m 11s
482:	learn: 0.0520589	total: 1m 6s	remaining: 1m 11s
483:	learn: 0.0518959	total: 1m 6s	remaining: 1m 11s
484:	learn: 0.0517276	total: 1m 7s	remaining: 1m 11s
485:	learn: 0.0516467	total: 1m 7s	remaining: 1m 11s
486:	learn: 0.0513598	total: 1m 7s	remaining: 1m 11s
487:	learn: 0.0512682	total: 1m 7s	remaining: 1m 10s
488:	learn: 0.0510504	total: 1m 7s	remaining: 1m 10s
489:	learn: 0.0508648	total: 1m 7s	remaining: 1m 10s
490:	learn: 0.0505725	total: 1m 7s	remaining: 

628:	learn: 0.0309873	total: 1m 27s	remaining: 51.5s
629:	learn: 0.0308790	total: 1m 27s	remaining: 51.4s
630:	learn: 0.0308244	total: 1m 27s	remaining: 51.2s
631:	learn: 0.0307928	total: 1m 27s	remaining: 51.1s
632:	learn: 0.0307421	total: 1m 27s	remaining: 51s
633:	learn: 0.0306757	total: 1m 28s	remaining: 50.8s
634:	learn: 0.0305778	total: 1m 28s	remaining: 50.7s
635:	learn: 0.0304700	total: 1m 28s	remaining: 50.5s
636:	learn: 0.0302883	total: 1m 28s	remaining: 50.4s
637:	learn: 0.0301734	total: 1m 28s	remaining: 50.3s
638:	learn: 0.0300274	total: 1m 28s	remaining: 50.1s
639:	learn: 0.0299873	total: 1m 28s	remaining: 50s
640:	learn: 0.0299331	total: 1m 29s	remaining: 49.8s
641:	learn: 0.0298735	total: 1m 29s	remaining: 49.7s
642:	learn: 0.0297395	total: 1m 29s	remaining: 49.6s
643:	learn: 0.0296272	total: 1m 29s	remaining: 49.4s
644:	learn: 0.0295928	total: 1m 29s	remaining: 49.3s
645:	learn: 0.0295098	total: 1m 29s	remaining: 49.2s
646:	learn: 0.0293267	total: 1m 29s	remaining: 49s

784:	learn: 0.0192834	total: 1m 49s	remaining: 29.9s
785:	learn: 0.0192246	total: 1m 49s	remaining: 29.8s
786:	learn: 0.0191395	total: 1m 49s	remaining: 29.7s
787:	learn: 0.0190977	total: 1m 49s	remaining: 29.5s
788:	learn: 0.0190807	total: 1m 49s	remaining: 29.4s
789:	learn: 0.0189862	total: 1m 50s	remaining: 29.3s
790:	learn: 0.0189570	total: 1m 50s	remaining: 29.1s
791:	learn: 0.0189132	total: 1m 50s	remaining: 29s
792:	learn: 0.0188723	total: 1m 50s	remaining: 28.8s
793:	learn: 0.0188299	total: 1m 50s	remaining: 28.7s
794:	learn: 0.0187666	total: 1m 50s	remaining: 28.6s
795:	learn: 0.0186661	total: 1m 50s	remaining: 28.4s
796:	learn: 0.0186043	total: 1m 51s	remaining: 28.3s
797:	learn: 0.0185666	total: 1m 51s	remaining: 28.1s
798:	learn: 0.0185003	total: 1m 51s	remaining: 28s
799:	learn: 0.0184395	total: 1m 51s	remaining: 27.9s
800:	learn: 0.0183938	total: 1m 51s	remaining: 27.7s
801:	learn: 0.0183431	total: 1m 51s	remaining: 27.6s
802:	learn: 0.0182737	total: 1m 51s	remaining: 27.

942:	learn: 0.0125392	total: 2m 11s	remaining: 7.96s
943:	learn: 0.0125036	total: 2m 11s	remaining: 7.82s
944:	learn: 0.0124733	total: 2m 11s	remaining: 7.68s
945:	learn: 0.0124413	total: 2m 12s	remaining: 7.54s
946:	learn: 0.0124194	total: 2m 12s	remaining: 7.4s
947:	learn: 0.0123704	total: 2m 12s	remaining: 7.26s
948:	learn: 0.0123559	total: 2m 12s	remaining: 7.12s
949:	learn: 0.0123280	total: 2m 12s	remaining: 6.98s
950:	learn: 0.0123099	total: 2m 12s	remaining: 6.84s
951:	learn: 0.0122962	total: 2m 12s	remaining: 6.71s
952:	learn: 0.0122654	total: 2m 13s	remaining: 6.57s
953:	learn: 0.0122545	total: 2m 13s	remaining: 6.42s
954:	learn: 0.0122316	total: 2m 13s	remaining: 6.29s
955:	learn: 0.0122040	total: 2m 13s	remaining: 6.15s
956:	learn: 0.0121885	total: 2m 13s	remaining: 6.01s
957:	learn: 0.0121574	total: 2m 13s	remaining: 5.87s
958:	learn: 0.0121217	total: 2m 13s	remaining: 5.73s
959:	learn: 0.0120666	total: 2m 14s	remaining: 5.59s
960:	learn: 0.0120551	total: 2m 14s	remaining: 

<catboost.core.CatBoostRegressor at 0x7f5ccb2210a0>

In [68]:
model_3 = CatBoostRegressor(custom_metric=['R2'],depth=14,iterations=1000)
# model_3.fit(train_data_df[['max_d','min_d','x1','y1','x2','y2','focal_lenght']], train_data_df[['distance']].values)
model_3.fit(temp_df_model2[['max_d','min_d','x1','y1','x2','y2','focal_lenght']], temp_df_model2[['distance']].values)


Learning rate set to 0.045896
0:	learn: 1.6785559	total: 111ms	remaining: 1m 50s
1:	learn: 1.6186386	total: 211ms	remaining: 1m 45s
2:	learn: 1.5633643	total: 320ms	remaining: 1m 46s
3:	learn: 1.5100789	total: 420ms	remaining: 1m 44s
4:	learn: 1.4571536	total: 520ms	remaining: 1m 43s
5:	learn: 1.4079683	total: 614ms	remaining: 1m 41s
6:	learn: 1.3588019	total: 720ms	remaining: 1m 42s
7:	learn: 1.3124128	total: 820ms	remaining: 1m 41s
8:	learn: 1.2692069	total: 918ms	remaining: 1m 41s
9:	learn: 1.2248236	total: 1.01s	remaining: 1m 40s
10:	learn: 1.1808322	total: 1.02s	remaining: 1m 31s
11:	learn: 1.1411945	total: 1.12s	remaining: 1m 31s
12:	learn: 1.1032445	total: 1.21s	remaining: 1m 32s
13:	learn: 1.0648690	total: 1.24s	remaining: 1m 27s
14:	learn: 1.0324456	total: 1.34s	remaining: 1m 27s
15:	learn: 0.9999418	total: 1.44s	remaining: 1m 28s
16:	learn: 0.9696963	total: 1.54s	remaining: 1m 28s
17:	learn: 0.9370216	total: 1.63s	remaining: 1m 28s
18:	learn: 0.9078041	total: 1.63s	remaining:

158:	learn: 0.2440690	total: 14.5s	remaining: 1m 16s
159:	learn: 0.2431974	total: 14.6s	remaining: 1m 16s
160:	learn: 0.2424265	total: 14.7s	remaining: 1m 16s
161:	learn: 0.2420512	total: 14.8s	remaining: 1m 16s
162:	learn: 0.2414840	total: 14.9s	remaining: 1m 16s
163:	learn: 0.2407004	total: 15s	remaining: 1m 16s
164:	learn: 0.2403550	total: 15.1s	remaining: 1m 16s
165:	learn: 0.2397139	total: 15.2s	remaining: 1m 16s
166:	learn: 0.2390104	total: 15.3s	remaining: 1m 16s
167:	learn: 0.2382210	total: 15.4s	remaining: 1m 16s
168:	learn: 0.2376824	total: 15.5s	remaining: 1m 16s
169:	learn: 0.2370869	total: 15.6s	remaining: 1m 15s
170:	learn: 0.2362885	total: 15.6s	remaining: 1m 15s
171:	learn: 0.2357612	total: 15.8s	remaining: 1m 15s
172:	learn: 0.2354110	total: 15.8s	remaining: 1m 15s
173:	learn: 0.2350398	total: 15.8s	remaining: 1m 14s
174:	learn: 0.2349069	total: 15.8s	remaining: 1m 14s
175:	learn: 0.2342153	total: 15.9s	remaining: 1m 14s
176:	learn: 0.2338328	total: 16s	remaining: 1m 1

316:	learn: 0.1555864	total: 29.8s	remaining: 1m 4s
317:	learn: 0.1553681	total: 29.9s	remaining: 1m 4s
318:	learn: 0.1546185	total: 30s	remaining: 1m 4s
319:	learn: 0.1540351	total: 30.1s	remaining: 1m 4s
320:	learn: 0.1538206	total: 30.2s	remaining: 1m 3s
321:	learn: 0.1532715	total: 30.3s	remaining: 1m 3s
322:	learn: 0.1528734	total: 30.4s	remaining: 1m 3s
323:	learn: 0.1526686	total: 30.5s	remaining: 1m 3s
324:	learn: 0.1519491	total: 30.6s	remaining: 1m 3s
325:	learn: 0.1513913	total: 30.7s	remaining: 1m 3s
326:	learn: 0.1508447	total: 30.8s	remaining: 1m 3s
327:	learn: 0.1506876	total: 30.9s	remaining: 1m 3s
328:	learn: 0.1501112	total: 31s	remaining: 1m 3s
329:	learn: 0.1498663	total: 31.1s	remaining: 1m 3s
330:	learn: 0.1493358	total: 31.2s	remaining: 1m 3s
331:	learn: 0.1491162	total: 31.4s	remaining: 1m 3s
332:	learn: 0.1485660	total: 31.5s	remaining: 1m 3s
333:	learn: 0.1483301	total: 31.6s	remaining: 1m 2s
334:	learn: 0.1482317	total: 31.6s	remaining: 1m 2s
335:	learn: 0.14

478:	learn: 0.0979762	total: 45.9s	remaining: 49.9s
479:	learn: 0.0976434	total: 46s	remaining: 49.8s
480:	learn: 0.0975661	total: 46.1s	remaining: 49.7s
481:	learn: 0.0971906	total: 46.2s	remaining: 49.6s
482:	learn: 0.0968330	total: 46.3s	remaining: 49.5s
483:	learn: 0.0967902	total: 46.4s	remaining: 49.4s
484:	learn: 0.0963669	total: 46.5s	remaining: 49.3s
485:	learn: 0.0960628	total: 46.6s	remaining: 49.2s
486:	learn: 0.0957929	total: 46.6s	remaining: 49.1s
487:	learn: 0.0955938	total: 46.8s	remaining: 49.1s
488:	learn: 0.0954086	total: 46.9s	remaining: 49s
489:	learn: 0.0949779	total: 47s	remaining: 48.9s
490:	learn: 0.0946915	total: 47s	remaining: 48.8s
491:	learn: 0.0943303	total: 47.1s	remaining: 48.7s
492:	learn: 0.0941288	total: 47.3s	remaining: 48.6s
493:	learn: 0.0940659	total: 47.4s	remaining: 48.5s
494:	learn: 0.0938594	total: 47.5s	remaining: 48.4s
495:	learn: 0.0937750	total: 47.6s	remaining: 48.3s
496:	learn: 0.0934842	total: 47.7s	remaining: 48.2s
497:	learn: 0.093174

639:	learn: 0.0649724	total: 1m 1s	remaining: 34.7s
640:	learn: 0.0647106	total: 1m 1s	remaining: 34.6s
641:	learn: 0.0645555	total: 1m 1s	remaining: 34.6s
642:	learn: 0.0645321	total: 1m 2s	remaining: 34.5s
643:	learn: 0.0644510	total: 1m 2s	remaining: 34.4s
644:	learn: 0.0643104	total: 1m 2s	remaining: 34.3s
645:	learn: 0.0642304	total: 1m 2s	remaining: 34.2s
646:	learn: 0.0639493	total: 1m 2s	remaining: 34.1s
647:	learn: 0.0637728	total: 1m 2s	remaining: 34s
648:	learn: 0.0637545	total: 1m 2s	remaining: 33.9s
649:	learn: 0.0636442	total: 1m 2s	remaining: 33.8s
650:	learn: 0.0635652	total: 1m 2s	remaining: 33.7s
651:	learn: 0.0633936	total: 1m 2s	remaining: 33.6s
652:	learn: 0.0631582	total: 1m 3s	remaining: 33.5s
653:	learn: 0.0629682	total: 1m 3s	remaining: 33.4s
654:	learn: 0.0628041	total: 1m 3s	remaining: 33.3s
655:	learn: 0.0627693	total: 1m 3s	remaining: 33.2s
656:	learn: 0.0626364	total: 1m 3s	remaining: 33.2s
657:	learn: 0.0623632	total: 1m 3s	remaining: 33.1s
658:	learn: 0.

796:	learn: 0.0451099	total: 1m 17s	remaining: 19.7s
797:	learn: 0.0450743	total: 1m 17s	remaining: 19.6s
798:	learn: 0.0449478	total: 1m 17s	remaining: 19.5s
799:	learn: 0.0448354	total: 1m 17s	remaining: 19.4s
800:	learn: 0.0447215	total: 1m 17s	remaining: 19.3s
801:	learn: 0.0446234	total: 1m 17s	remaining: 19.2s
802:	learn: 0.0445602	total: 1m 18s	remaining: 19.1s
803:	learn: 0.0444158	total: 1m 18s	remaining: 19s
804:	learn: 0.0443545	total: 1m 18s	remaining: 18.9s
805:	learn: 0.0442169	total: 1m 18s	remaining: 18.8s
806:	learn: 0.0441542	total: 1m 18s	remaining: 18.7s
807:	learn: 0.0440247	total: 1m 18s	remaining: 18.6s
808:	learn: 0.0439096	total: 1m 18s	remaining: 18.5s
809:	learn: 0.0438191	total: 1m 18s	remaining: 18.4s
810:	learn: 0.0437528	total: 1m 18s	remaining: 18.3s
811:	learn: 0.0437028	total: 1m 18s	remaining: 18.3s
812:	learn: 0.0436361	total: 1m 18s	remaining: 18.2s
813:	learn: 0.0435820	total: 1m 19s	remaining: 18.1s
814:	learn: 0.0434655	total: 1m 19s	remaining: 1

952:	learn: 0.0330793	total: 1m 32s	remaining: 4.56s
953:	learn: 0.0330061	total: 1m 32s	remaining: 4.46s
954:	learn: 0.0329326	total: 1m 32s	remaining: 4.36s
955:	learn: 0.0328545	total: 1m 32s	remaining: 4.27s
956:	learn: 0.0328412	total: 1m 32s	remaining: 4.17s
957:	learn: 0.0326997	total: 1m 32s	remaining: 4.07s
958:	learn: 0.0325762	total: 1m 33s	remaining: 3.98s
959:	learn: 0.0325677	total: 1m 33s	remaining: 3.88s
960:	learn: 0.0324939	total: 1m 33s	remaining: 3.78s
961:	learn: 0.0323745	total: 1m 33s	remaining: 3.69s
962:	learn: 0.0322864	total: 1m 33s	remaining: 3.59s
963:	learn: 0.0321539	total: 1m 33s	remaining: 3.49s
964:	learn: 0.0321470	total: 1m 33s	remaining: 3.4s
965:	learn: 0.0321036	total: 1m 33s	remaining: 3.3s
966:	learn: 0.0320684	total: 1m 33s	remaining: 3.2s
967:	learn: 0.0319930	total: 1m 33s	remaining: 3.1s
968:	learn: 0.0319777	total: 1m 34s	remaining: 3.01s
969:	learn: 0.0319413	total: 1m 34s	remaining: 2.91s
970:	learn: 0.0318925	total: 1m 34s	remaining: 2.8

<catboost.core.CatBoostRegressor at 0x7f5ccb221c70>

In [69]:
preds = model_2.predict(train_data_df[['x_min', 'y_min', 'x_max', 'y_max', 'conf','max_d','min_d','x1','y1','x2','y2','focal_lenght']])
len(preds)

530

In [70]:
preds_model3 = model_3.predict(train_data_df[['max_d','min_d','x1','y1','x2','y2','focal_lenght']])
len(preds_model3)

530

In [71]:
preds[0:10]

array([     4.8713,      1.5427,      3.6717,      2.2174,      3.7239,      4.5126,      5.3788,      6.2019,      7.1176,       5.263])

In [72]:
y_real = train_data_df[['distance']].values
len(y_real)

530

In [73]:
y_real.T[0][:10]

array([       4.88,        1.54,        3.68,        2.22,        3.73,        4.52,         5.4,        6.21,        7.12,        5.26])

In [74]:
preds_model3[0:10]

array([     4.8085,      4.4642,      3.6427,       2.192,       3.687,      4.4415,      5.3112,      6.1453,      7.0543,      5.1883])

In [75]:
div_pred=preds/y_real.T[0]

In [76]:
div_pred[div_pred.argsort()[-5:]]

array([     1.0104,      1.0105,      1.0109,      1.0129,      1.0138])

In [77]:
div_pred.argsort()[-5:]

array([216, 280, 505, 244, 205])

In [78]:
div_pred[div_pred.argsort()[:5]]

array([    0.98808,     0.98871,     0.98938,     0.99074,     0.99101])

In [79]:
div_pred.argsort()[:5]

array([294, 325, 220,  24, 515])

In [80]:
filt_index = list(np.concatenate((div_pred.argsort()[-5:], div_pred.argsort()[:5]), axis=0))
filt_index

[216, 280, 505, 244, 205, 294, 325, 220, 24, 515]

In [81]:
pd.set_option('display.max_columns', None)

In [82]:
train_data_df_filtered= train_data_df.filter(items = filt_index, axis=0)
train_data_df_filtered.head(15)



Unnamed: 0,image_name,distance,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
216,img_2255.jpg,1.57,3024.0,4032.0,-839.760254,-589.310364,686.181885,629.525879,0.956428,2.0,583.219513,121.066098,4.817364,-320.0,12.0,251.0,143.0,1.54,1.424676,1.373068
280,img_2390.jpg,1.25,3024.0,4032.0,-1033.36145,-627.740234,788.004639,789.916992,0.952801,2.0,715.08461,149.013422,4.798793,-479.0,145.0,260.0,329.0,1.54,1.15748,1.119867
505,img_2901.jpg,1.78,3024.0,4032.0,-722.271118,-501.768311,667.621094,591.592773,0.950557,2.0,480.009375,97.005155,4.948287,-262.0,-14.0,221.0,92.0,1.54,1.77805,1.668301
244,img_2304.jpg,2.1,3024.0,4032.0,-698.226685,-540.345215,426.173584,339.766602,0.946426,2.0,396.126243,80.024996,4.950031,-311.0,-61.0,70.0,45.0,1.54,2.155327,2.021578
205,img_2010.jpg,1.98,3024.0,4032.0,-641.743774,-349.371826,537.031006,535.217896,0.949507,2.0,411.098528,83.006024,4.952635,-241.0,100.0,139.0,207.0,1.54,2.077921,1.947952
294,img_2426.jpg,2.12,3024.0,4032.0,-785.122559,-404.735474,362.621826,471.732422,0.946729,2.0,412.004854,83.006024,4.963554,-401.0,86.0,18.0,189.0,1.54,2.077921,1.943666
325,img_2484.jpg,1.57,3024.0,4032.0,-859.301514,-613.528076,633.097656,580.545898,0.949609,2.0,533.303853,107.07474,4.980669,-354.0,337.0,164.0,461.0,1.54,1.610837,1.501583
220,img_2264.jpg,1.43,3024.0,4032.0,-901.614258,-457.625732,722.616455,834.071289,0.953298,2.0,625.204766,129.062001,4.84422,-389.0,174.0,241.0,314.0,1.54,1.336412,1.28086
24,img_1640.jpg,2.72,3024.0,4032.0,-710.672119,-496.499207,499.928711,532.672852,0.944775,2.0,407.044224,79.006329,5.152046,-257.0,-86.0,137.0,22.0,1.54,2.183116,1.967354
515,img_2916.jpg,4.2,3024.0,4032.0,-404.506104,-273.929932,303.935059,303.476562,0.933645,2.0,238.0,47.0,5.06383,-175.0,40.0,86.0,97.0,1.54,3.669787,3.364706


In [83]:
top_val=0
bottom_val=0

y_mean=np.mean(y_real)
# y_mean
for i in range(len(y_real)):
    top_val+=pow(y_real[i][0]-preds[i],2)
    bottom_val+=pow(y_real[i][0]-y_mean,2)
    
print('R2=',(1-top_val/bottom_val))

R2= 0.9999636025699469


In [84]:
top_val=0
bottom_val=0

y_mean=np.mean(y_real)
# y_mean
for i in range(len(y_real)):
    top_val+=pow(y_real[i][0]-preds_model3[i],2)
    bottom_val+=pow(y_real[i][0]-y_mean,2)
    
print('R2=',(1-top_val/bottom_val))

R2= 0.9533677393845257


In [85]:
y_mean

4.181037735849056

# test data

In [86]:
with open("_root_stepik_hacaton_ulyanovsk_dataset_test.json","r") as f:
    test_plates = json.load(f)
len(test_plates)

521

In [87]:
import exifread
import io
import re

test_image_exif={}
test_car_data = []
for img_path in glob.glob('dataset/test/*'):
    if ".jpg" not in img_path and ".heic" not in img_path:
        print(img_path)
        continue
        
    img_name=img_path.split('/')[-1]
    
    
    if 'heic' in img_path:
        heif_file = pyheif.read(img_path)
        
        for metadata in heif_file.metadata:

            if metadata['type'] == 'Exif':
                fstream = io.BytesIO(metadata['data'][6:])

                exifdata = exifread.process_file(fstream,details=False)
                m = re.search('\s+(\d+.\d+)mm', str(exifdata))
                if m:
                    found = m.group(1)
                    test_image_exif[img_name]=float(found)
                else:
                    test_image_exif[img_name]=1.5
    else:
        exif_dict = piexif.load(img_path)
        v1,v2=exif_dict['Exif'][37386]
        test_image_exif[img_name]=v1/v2
        
    
    if 'heic' in img_path:
        heif_file = pyheif.read(img_path)
        img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    else:
        img = Image.open(img_path)
        
        
    img=np.array(img)
    for p in test_plates:
        if p['path']==img_path:
            if p['found_plate'] and p['angle']!=0:
                img=rotate_image(img,p['angle'])
            break
            
    results = car_model(img)

    if results.xyxy[0].shape != torch.Size([0, 6]):
        results = [img_name,img.shape[0],img.shape[1]] + results.xyxy[0][0].cpu().numpy().tolist()
        test_car_data.append(results)
    
    

In [88]:
from pathlib import Path
Path("dataset/small_test_solution7").mkdir(parents=True, exist_ok=True)

In [89]:
image_delta_coords={}
for plate in tqdm(test_plates):
    if not plate['found_plate']:
        continue
        
    if 'heic' in plate['path']:
        heif_file = pyheif.read(plate['path'])
        img = Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
    else:
        img = Image.open(plate['path'])
        
    img=np.array(img)
    if plate['angle']!=0:
        img=rotate_image(img,plate['angle'])
    
    plate_index=0
    for x1,y1,x2,y2 in plate['plate']:
        image=img
        x_min=max(0,x1-50)
        x_max=min(plate['shape'][1],x2+50)
        y_min=max(0,y1-50)
        y_max=min(plate['shape'][0],y2+50)

        resized = image[y_min:y_max,x_min:x_max] #resize_image(image)

        im = Image.fromarray(resized)
        croped_path=f"dataset/small_test_solution7/{plate['path'].split('/')[-1]}_{plate_index}.jpg"
        plate_index+=1
        im.save(croped_path)
        image_delta_coords[croped_path.split('/')[-1]]={'orig':plate['path'],'croped':croped_path,'coord':[x1,y1,x2,y2],'delta':[x_min,x_max,y_min,y_max]}
        
        


  0%|          | 0/521 [00:00<?, ?it/s]

In [90]:
from skimage.io import imread
import os
import json
import glob

test_images = []
test_images_file = []

for img_path in glob.glob('dataset/small_test_solution7/*.jpg'):
    test_images.append(imread(img_path))
    test_images_file.append(img_path.split("/")[-1])

In [91]:
from skimage.transform import resize
import numpy as np

size = (256, 256)
X_test = [resize(x, size, mode='constant', anti_aliasing=True,) for x in test_images]
X_test = np.array(X_test, np.float32)

In [92]:
X_test_tensor=torch.tensor(X_test)
X_test_tensor=torch.moveaxis(X_test_tensor, 3, 1)

In [93]:
model.to(device)
all_test_predictions = []
with torch.no_grad():
    start=0
    batch_size=10
    while start<X_test_tensor.shape[0]:
#         print('start=',start)
        batch=X_test_tensor[start:min(start+batch_size,X_test_tensor.shape[0])]
        predictions = list(model(batch.to(device)).cpu().numpy())
        all_test_predictions+=predictions
        start+=batch_size


In [94]:
import numpy as np
import cv2 as cv2
from random import randrange

Path("dataset/small_test_solution7_plate_box").mkdir(parents=True, exist_ok=True)

test_plate_sizes=[]
for i in range(len(all_test_predictions)):
    predict=all_test_predictions[i]
    mask = predict.squeeze()
    mask[mask>0]=255
    mask[mask<0]=0
    img=test_images[i]
    size= img.shape
    new_mask = resize(mask, size, mode='constant', anti_aliasing=True,)
    
    new_mask=new_mask.astype(np.uint8)
    gray = cv2.cvtColor(new_mask, cv2.COLOR_BGR2GRAY)
    
    ret, thresh = cv2.threshold(gray, 127, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_TC89_L1)
    
    boxed_path=f"dataset/small_test_solution7_plate_box/{test_images_file[i]}_{randrange(10)}.jpg"
    if len(contours)==0:
        im = Image.fromarray(img)
        im.save(boxed_path)
        print('empty:',test_images_file[i])
        continue
        
    cnt = contours[0]
    rect = cv2.minAreaRect(cnt)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    boxed_img=draw_box(img,box)
    im = Image.fromarray(boxed_img)
    im.save(boxed_path)
    
    
    d1=pow(box[0][0]-box[1][0],2)+pow(box[0][1]-box[1][1],2)
    d2=pow(box[2][0]-box[1][0],2)+pow(box[2][1]-box[1][1],2)
    max_d=max(d1,d2)
    min_d=min(d1,d2)
    
    plate_i={'image_name':test_images_file[i],'max_d':max_d,'min_d':min_d}
    
    if test_images_file[i] not in image_delta_coords:
        print('without coords:',test_images_file[i])
        continue
        
    plate_i['x1']=image_delta_coords[test_images_file[i]]['coord'][0]
    plate_i['y1']=image_delta_coords[test_images_file[i]]['coord'][1]
    plate_i['x2']=image_delta_coords[test_images_file[i]]['coord'][2]
    plate_i['y2']=image_delta_coords[test_images_file[i]]['coord'][3]
    test_plate_sizes.append(plate_i)


empty: img_2717.jpg_0.jpg
empty: img_2758.jpg_0.jpg
empty: img_2715.jpg_0.jpg


In [95]:
import math

test_plate_size_dict={}
for i in range(len(test_plate_sizes)):
    if test_plate_sizes[i]['min_d']==0:
        continue
    
    real_name=test_plate_sizes[i]['image_name'].split('_')[0]+"_"+test_plate_sizes[i]['image_name'].split('_')[1]
    box_div=math.sqrt(test_plate_sizes[i]['max_d'])/math.sqrt(test_plate_sizes[i]['min_d'])
    max_d=math.sqrt(test_plate_sizes[i]['max_d'])
    min_d=math.sqrt(test_plate_sizes[i]['min_d'])
    if real_name in test_plate_size_dict:
        if abs(test_plate_size_dict[real_name]['box_div']-520/112)<abs(box_div-520/112):
            continue
    test_plate_size_dict[real_name]={
        'box_div':box_div,
        'max_d':max_d,
        'min_d':min_d,
        'x1':test_plate_sizes[i]['x1'],
        'y1':test_plate_sizes[i]['y1'],
        'x2':test_plate_sizes[i]['x2'],
        'y2':test_plate_sizes[i]['y2'],
    }
    
test_plate_size_dataset=[]
for k in test_plate_size_dict:
    test_plate_size_dataset.append({
        'image_name':k, 
        'box_div':test_plate_size_dict[k]['box_div'],
        'max_d':test_plate_size_dict[k]['max_d'],
        'min_d':test_plate_size_dict[k]['min_d'],
        'x1':test_plate_size_dict[k]['x1'],
        'y1':test_plate_size_dict[k]['y1'],
        'x2':test_plate_size_dict[k]['x2'],
        'y2':test_plate_size_dict[k]['y2']
    })
    
    


In [96]:
test_plate_data_df = pd.DataFrame(test_plate_size_dataset, columns = ['image_name', 'max_d','min_d','box_div','x1','y1','x2','y2'])
test_plate_data_df.head()

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2
0,img_1664.jpg,298.377278,61.073726,4.885526,1884,1537,2188,1602
1,img_2865.jpg,431.0,90.0,4.788889,1688,1594,2047,1698
2,img_2769.jpg,354.022598,69.007246,5.130224,1762,1718,2092,1795
3,img_2662.jpg,186.131674,40.012498,4.651838,1841,1575,2007,1626
4,img_2676.jpg,203.0,41.0,4.95122,1821,1567,2017,1619


In [97]:
test_image_exif_dataset=[]
for k in test_image_exif:
    test_image_exif_dataset.append({'image_name':k,'focal_lenght':test_image_exif[k]})

In [98]:
test_image_exif_df = pd.DataFrame(test_image_exif_dataset, columns = ['image_name', 'focal_lenght'])
test_image_exif_df.head()

Unnamed: 0,image_name,focal_lenght
0,img_2015.jpg,1.54
1,img_2698.heic,1.55
2,img_2616.jpg,1.54
3,img_2508.jpg,1.54
4,img_2541.jpg,1.54


In [99]:
test_data_df = pd.DataFrame(test_car_data, columns = ['image_name','height','width', 'x_min', 'y_min', 'x_max', 'y_max', 'conf', 'class'])
test_data_df.head()


Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class
0,img_2015.jpg,3024,4032,1713.450562,1299.486572,2221.601074,1711.077881,0.922791,2.0
1,img_2698.heic,3024,4032,1839.785278,1326.827148,2241.602051,1672.299194,0.911798,2.0
2,img_2616.jpg,3024,4032,2897.265381,1367.213623,3904.821533,1712.577271,0.942119,2.0
3,img_2508.jpg,3024,4032,757.266357,1326.238525,1581.174805,1784.345581,0.934661,2.0
4,img_2541.jpg,3024,4032,9.108929,1275.869019,980.987732,1662.305664,0.935703,2.0


In [100]:
test_data_df = pd.merge(test_data_df,test_plate_data_df, how='left')
test_data_df.head(10)

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2
0,img_2015.jpg,3024,4032,1713.450562,1299.486572,2221.601074,1711.077881,0.922791,2.0,163.076669,33.015148,4.93945,1861.0,1470.0,2054.0,1535.0
1,img_2698.heic,3024,4032,1839.785278,1326.827148,2241.602051,1672.299194,0.911798,2.0,140.014285,29.017236,4.825211,1970.0,1567.0,2110.0,1607.0
2,img_2616.jpg,3024,4032,2897.265381,1367.213623,3904.821533,1712.577271,0.942119,2.0,147.054412,31.016125,4.741225,1814.0,1569.0,1974.0,1625.0
3,img_2508.jpg,3024,4032,757.266357,1326.238525,1581.174805,1784.345581,0.934661,2.0,183.002732,38.013156,4.814195,1792.0,1700.0,1956.0,1756.0
4,img_2541.jpg,3024,4032,9.108929,1275.869019,980.987732,1662.305664,0.935703,2.0,172.072659,35.014283,4.914356,1740.0,1524.0,1942.0,1577.0
5,img_2402.jpg,3024,4032,1202.303711,945.354309,2485.848633,1915.999512,0.953384,2.0,444.703272,68.117545,6.528469,1655.0,1749.0,2099.0,1853.0
6,img_2555.jpg,3024,4032,2209.193604,1320.837524,3069.511719,1784.307861,0.936762,2.0,186.386695,38.118237,4.889699,1792.0,1678.0,2005.0,1739.0
7,img_2444.jpg,3024,4032,1635.321899,1312.564087,2252.353516,1859.894165,0.940548,2.0,212.0,44.0,4.818182,1861.0,1617.0,2088.0,1675.0
8,img_1718.jpg,3024,4032,1716.558594,1423.43689,2232.912842,1838.526367,0.960751,2.0,167.047897,35.014283,4.77085,1894.0,1566.0,2062.0,1636.0
9,img_2875.jpg,3024,4032,1703.53186,1161.769165,2074.114258,1477.958496,0.9098,2.0,127.035428,24.020824,5.288554,1824.0,1299.0,1956.0,1352.0


In [101]:
test_data_df = pd.merge(test_data_df, test_image_exif_df, how='left')
test_data_df.head(10)

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght
0,img_2015.jpg,3024,4032,1713.450562,1299.486572,2221.601074,1711.077881,0.922791,2.0,163.076669,33.015148,4.93945,1861.0,1470.0,2054.0,1535.0,1.54
1,img_2698.heic,3024,4032,1839.785278,1326.827148,2241.602051,1672.299194,0.911798,2.0,140.014285,29.017236,4.825211,1970.0,1567.0,2110.0,1607.0,1.55
2,img_2616.jpg,3024,4032,2897.265381,1367.213623,3904.821533,1712.577271,0.942119,2.0,147.054412,31.016125,4.741225,1814.0,1569.0,1974.0,1625.0,1.54
3,img_2508.jpg,3024,4032,757.266357,1326.238525,1581.174805,1784.345581,0.934661,2.0,183.002732,38.013156,4.814195,1792.0,1700.0,1956.0,1756.0,1.54
4,img_2541.jpg,3024,4032,9.108929,1275.869019,980.987732,1662.305664,0.935703,2.0,172.072659,35.014283,4.914356,1740.0,1524.0,1942.0,1577.0,1.54
5,img_2402.jpg,3024,4032,1202.303711,945.354309,2485.848633,1915.999512,0.953384,2.0,444.703272,68.117545,6.528469,1655.0,1749.0,2099.0,1853.0,1.54
6,img_2555.jpg,3024,4032,2209.193604,1320.837524,3069.511719,1784.307861,0.936762,2.0,186.386695,38.118237,4.889699,1792.0,1678.0,2005.0,1739.0,1.54
7,img_2444.jpg,3024,4032,1635.321899,1312.564087,2252.353516,1859.894165,0.940548,2.0,212.0,44.0,4.818182,1861.0,1617.0,2088.0,1675.0,1.54
8,img_1718.jpg,3024,4032,1716.558594,1423.43689,2232.912842,1838.526367,0.960751,2.0,167.047897,35.014283,4.77085,1894.0,1566.0,2062.0,1636.0,1.54
9,img_2875.jpg,3024,4032,1703.53186,1161.769165,2074.114258,1477.958496,0.9098,2.0,127.035428,24.020824,5.288554,1824.0,1299.0,1956.0,1352.0,1.54


In [102]:
test_data_df['calc_dist_min']=test_data_df['focal_lenght']*112/test_data_df['min_d']
test_data_df['calc_dist_max']=test_data_df['focal_lenght']*520/test_data_df['max_d']
test_data_df.head(10)

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_2015.jpg,3024,4032,1713.450562,1299.486572,2221.601074,1711.077881,0.922791,2.0,163.076669,33.015148,4.93945,1861.0,1470.0,2054.0,1535.0,1.54,5.224269,4.910574
1,img_2698.heic,3024,4032,1839.785278,1326.827148,2241.602051,1672.299194,0.911798,2.0,140.014285,29.017236,4.825211,1970.0,1567.0,2110.0,1607.0,1.55,5.982651,5.756555
2,img_2616.jpg,3024,4032,2897.265381,1367.213623,3904.821533,1712.577271,0.942119,2.0,147.054412,31.016125,4.741225,1814.0,1569.0,1974.0,1625.0,1.54,5.560978,5.445603
3,img_2508.jpg,3024,4032,757.266357,1326.238525,1581.174805,1784.345581,0.934661,2.0,183.002732,38.013156,4.814195,1792.0,1700.0,1956.0,1756.0,1.54,4.537377,4.375891
4,img_2541.jpg,3024,4032,9.108929,1275.869019,980.987732,1662.305664,0.935703,2.0,172.072659,35.014283,4.914356,1740.0,1524.0,1942.0,1577.0,1.54,4.92599,4.653848
5,img_2402.jpg,3024,4032,1202.303711,945.354309,2485.848633,1915.999512,0.953384,2.0,444.703272,68.117545,6.528469,1655.0,1749.0,2099.0,1853.0,1.54,2.532094,1.800751
6,img_2555.jpg,3024,4032,2209.193604,1320.837524,3069.511719,1784.307861,0.936762,2.0,186.386695,38.118237,4.889699,1792.0,1678.0,2005.0,1739.0,1.54,4.524868,4.296444
7,img_2444.jpg,3024,4032,1635.321899,1312.564087,2252.353516,1859.894165,0.940548,2.0,212.0,44.0,4.818182,1861.0,1617.0,2088.0,1675.0,1.54,3.92,3.777358
8,img_1718.jpg,3024,4032,1716.558594,1423.43689,2232.912842,1838.526367,0.960751,2.0,167.047897,35.014283,4.77085,1894.0,1566.0,2062.0,1636.0,1.54,4.92599,4.793835
9,img_2875.jpg,3024,4032,1703.53186,1161.769165,2074.114258,1477.958496,0.9098,2.0,127.035428,24.020824,5.288554,1824.0,1299.0,1956.0,1352.0,1.54,7.180436,6.303753


In [103]:
test_data_df['x_min']=test_data_df['x_min']-test_data_df['width']/2
test_data_df['x_max']=test_data_df['x_max']-test_data_df['width']/2
test_data_df['y_min']=test_data_df['y_min']-test_data_df['height']/2
test_data_df['y_max']=test_data_df['y_max']-test_data_df['height']/2

test_data_df['x1']=test_data_df['x1']-test_data_df['width']/2
test_data_df['x2']=test_data_df['x2']-test_data_df['width']/2
test_data_df['y1']=test_data_df['y1']-test_data_df['height']/2
test_data_df['y2']=test_data_df['y2']-test_data_df['height']/2

test_data_df.head(10)

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,calc_dist_min,calc_dist_max
0,img_2015.jpg,3024,4032,-302.549438,-212.513428,205.601074,199.077881,0.922791,2.0,163.076669,33.015148,4.93945,-155.0,-42.0,38.0,23.0,1.54,5.224269,4.910574
1,img_2698.heic,3024,4032,-176.214722,-185.172852,225.602051,160.299194,0.911798,2.0,140.014285,29.017236,4.825211,-46.0,55.0,94.0,95.0,1.55,5.982651,5.756555
2,img_2616.jpg,3024,4032,881.265381,-144.786377,1888.821533,200.577271,0.942119,2.0,147.054412,31.016125,4.741225,-202.0,57.0,-42.0,113.0,1.54,5.560978,5.445603
3,img_2508.jpg,3024,4032,-1258.733643,-185.761475,-434.825195,272.345581,0.934661,2.0,183.002732,38.013156,4.814195,-224.0,188.0,-60.0,244.0,1.54,4.537377,4.375891
4,img_2541.jpg,3024,4032,-2006.891071,-236.130981,-1035.012268,150.305664,0.935703,2.0,172.072659,35.014283,4.914356,-276.0,12.0,-74.0,65.0,1.54,4.92599,4.653848
5,img_2402.jpg,3024,4032,-813.696289,-566.645691,469.848633,403.999512,0.953384,2.0,444.703272,68.117545,6.528469,-361.0,237.0,83.0,341.0,1.54,2.532094,1.800751
6,img_2555.jpg,3024,4032,193.193604,-191.162476,1053.511719,272.307861,0.936762,2.0,186.386695,38.118237,4.889699,-224.0,166.0,-11.0,227.0,1.54,4.524868,4.296444
7,img_2444.jpg,3024,4032,-380.678101,-199.435913,236.353516,347.894165,0.940548,2.0,212.0,44.0,4.818182,-155.0,105.0,72.0,163.0,1.54,3.92,3.777358
8,img_1718.jpg,3024,4032,-299.441406,-88.56311,216.912842,326.526367,0.960751,2.0,167.047897,35.014283,4.77085,-122.0,54.0,46.0,124.0,1.54,4.92599,4.793835
9,img_2875.jpg,3024,4032,-312.46814,-350.230835,58.114258,-34.041504,0.9098,2.0,127.035428,24.020824,5.288554,-192.0,-213.0,-60.0,-160.0,1.54,7.180436,6.303753


In [104]:
preds = model_2.predict(test_data_df[['x_min', 'y_min', 'x_max', 'y_max', 'conf','max_d','min_d','x1','y1','x2','y2','focal_lenght']])



In [105]:
test_data_df['distance'] = preds
test_data_df.head()

Unnamed: 0,image_name,height,width,x_min,y_min,x_max,y_max,conf,class,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,calc_dist_min,calc_dist_max,distance
0,img_2015.jpg,3024,4032,-302.549438,-212.513428,205.601074,199.077881,0.922791,2.0,163.076669,33.015148,4.93945,-155.0,-42.0,38.0,23.0,1.54,5.224269,4.910574,5.082751
1,img_2698.heic,3024,4032,-176.214722,-185.172852,225.602051,160.299194,0.911798,2.0,140.014285,29.017236,4.825211,-46.0,55.0,94.0,95.0,1.55,5.982651,5.756555,6.229609
2,img_2616.jpg,3024,4032,881.265381,-144.786377,1888.821533,200.577271,0.942119,2.0,147.054412,31.016125,4.741225,-202.0,57.0,-42.0,113.0,1.54,5.560978,5.445603,5.686252
3,img_2508.jpg,3024,4032,-1258.733643,-185.761475,-434.825195,272.345581,0.934661,2.0,183.002732,38.013156,4.814195,-224.0,188.0,-60.0,244.0,1.54,4.537377,4.375891,4.773726
4,img_2541.jpg,3024,4032,-2006.891071,-236.130981,-1035.012268,150.305664,0.935703,2.0,172.072659,35.014283,4.914356,-276.0,12.0,-74.0,65.0,1.54,4.92599,4.653848,4.925868


In [106]:
test_data_df_model2 = test_plate_data_df.copy()
test_data_df_model2.head()

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2
0,img_1664.jpg,298.377278,61.073726,4.885526,1884,1537,2188,1602
1,img_2865.jpg,431.0,90.0,4.788889,1688,1594,2047,1698
2,img_2769.jpg,354.022598,69.007246,5.130224,1762,1718,2092,1795
3,img_2662.jpg,186.131674,40.012498,4.651838,1841,1575,2007,1626
4,img_2676.jpg,203.0,41.0,4.95122,1821,1567,2017,1619


In [107]:
test_data_df_model2 = pd.merge(test_data_df_model2, test_image_exif_df, how='left')
test_data_df_model2.head(10)

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght
0,img_1664.jpg,298.377278,61.073726,4.885526,1884,1537,2188,1602,1.54
1,img_2865.jpg,431.0,90.0,4.788889,1688,1594,2047,1698,1.54
2,img_2769.jpg,354.022598,69.007246,5.130224,1762,1718,2092,1795,1.54
3,img_2662.jpg,186.131674,40.012498,4.651838,1841,1575,2007,1626,1.54
4,img_2676.jpg,203.0,41.0,4.95122,1821,1567,2017,1619,1.54
5,img_2428.jpg,254.017716,53.009433,4.791934,1829,1703,2085,1763,1.54
6,img_1823.jpg,154.0,31.0,4.967742,1889,1527,2043,1568,1.54
7,img_1953.jpg,112.071406,27.018512,4.147949,1837,1478,1960,1518,1.54
8,img_2772.jpg,277.064974,53.009433,5.226711,1670,1698,1942,1757,1.54
9,img_2559.jpg,124.197423,26.07681,4.762754,1871,1668,2001,1722,1.54


In [108]:
test_data_df_model2 = pd.merge(test_data_df_model2, test_data_df[['image_name','height','width']], how='left')
test_data_df_model2.head(10)

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,height,width
0,img_1664.jpg,298.377278,61.073726,4.885526,1884,1537,2188,1602,1.54,3024.0,4032.0
1,img_2865.jpg,431.0,90.0,4.788889,1688,1594,2047,1698,1.54,3024.0,4032.0
2,img_2769.jpg,354.022598,69.007246,5.130224,1762,1718,2092,1795,1.54,3024.0,4032.0
3,img_2662.jpg,186.131674,40.012498,4.651838,1841,1575,2007,1626,1.54,3024.0,4032.0
4,img_2676.jpg,203.0,41.0,4.95122,1821,1567,2017,1619,1.54,3024.0,4032.0
5,img_2428.jpg,254.017716,53.009433,4.791934,1829,1703,2085,1763,1.54,3024.0,4032.0
6,img_1823.jpg,154.0,31.0,4.967742,1889,1527,2043,1568,1.54,3024.0,4032.0
7,img_1953.jpg,112.071406,27.018512,4.147949,1837,1478,1960,1518,1.54,3024.0,4032.0
8,img_2772.jpg,277.064974,53.009433,5.226711,1670,1698,1942,1757,1.54,3024.0,4032.0
9,img_2559.jpg,124.197423,26.07681,4.762754,1871,1668,2001,1722,1.54,3024.0,4032.0


In [109]:
test_data_df_model2['x1']=test_data_df_model2['x1']-test_data_df_model2['width']/2
test_data_df_model2['x2']=test_data_df_model2['x2']-test_data_df_model2['width']/2
test_data_df_model2['y1']=test_data_df_model2['y1']-test_data_df_model2['height']/2
test_data_df_model2['y2']=test_data_df_model2['y2']-test_data_df_model2['height']/2
test_data_df_model2.head()

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,height,width
0,img_1664.jpg,298.377278,61.073726,4.885526,-132.0,25.0,172.0,90.0,1.54,3024.0,4032.0
1,img_2865.jpg,431.0,90.0,4.788889,-328.0,82.0,31.0,186.0,1.54,3024.0,4032.0
2,img_2769.jpg,354.022598,69.007246,5.130224,-254.0,206.0,76.0,283.0,1.54,3024.0,4032.0
3,img_2662.jpg,186.131674,40.012498,4.651838,-175.0,63.0,-9.0,114.0,1.54,3024.0,4032.0
4,img_2676.jpg,203.0,41.0,4.95122,-195.0,55.0,1.0,107.0,1.54,3024.0,4032.0


In [110]:
test_data_df_model2 = test_data_df_model2[test_data_df_model2['max_d'].notna()]
test_data_df_model2.head(10)


Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,height,width
0,img_1664.jpg,298.377278,61.073726,4.885526,-132.0,25.0,172.0,90.0,1.54,3024.0,4032.0
1,img_2865.jpg,431.0,90.0,4.788889,-328.0,82.0,31.0,186.0,1.54,3024.0,4032.0
2,img_2769.jpg,354.022598,69.007246,5.130224,-254.0,206.0,76.0,283.0,1.54,3024.0,4032.0
3,img_2662.jpg,186.131674,40.012498,4.651838,-175.0,63.0,-9.0,114.0,1.54,3024.0,4032.0
4,img_2676.jpg,203.0,41.0,4.95122,-195.0,55.0,1.0,107.0,1.54,3024.0,4032.0
5,img_2428.jpg,254.017716,53.009433,4.791934,-187.0,191.0,69.0,251.0,1.54,3024.0,4032.0
6,img_1823.jpg,154.0,31.0,4.967742,-127.0,15.0,27.0,56.0,1.54,3024.0,4032.0
7,img_1953.jpg,112.071406,27.018512,4.147949,-179.0,-34.0,-56.0,6.0,1.54,3024.0,4032.0
8,img_2772.jpg,277.064974,53.009433,5.226711,-346.0,186.0,-74.0,245.0,1.54,3024.0,4032.0
9,img_2559.jpg,124.197423,26.07681,4.762754,-145.0,156.0,-15.0,210.0,1.54,3024.0,4032.0


In [111]:
preds_model3 = model_3.predict(test_data_df_model2[['max_d','min_d','x1','y1','x2','y2','focal_lenght']])

test_data_df_model2['distance_model3'] = preds_model3
test_data_df_model2.head()

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2,focal_lenght,height,width,distance_model3
0,img_1664.jpg,298.377278,61.073726,4.885526,-132.0,25.0,172.0,90.0,1.54,3024.0,4032.0,2.947412
1,img_2865.jpg,431.0,90.0,4.788889,-328.0,82.0,31.0,186.0,1.54,3024.0,4032.0,2.03458
2,img_2769.jpg,354.022598,69.007246,5.130224,-254.0,206.0,76.0,283.0,1.54,3024.0,4032.0,2.444348
3,img_2662.jpg,186.131674,40.012498,4.651838,-175.0,63.0,-9.0,114.0,1.54,3024.0,4032.0,4.553922
4,img_2676.jpg,203.0,41.0,4.95122,-195.0,55.0,1.0,107.0,1.54,3024.0,4032.0,4.43946


In [112]:
sample_solution_df = test_data_df[['image_name', 'distance']]
sample_solution_df.head(10)

Unnamed: 0,image_name,distance
0,img_2015.jpg,5.082751
1,img_2698.heic,6.229609
2,img_2616.jpg,5.686252
3,img_2508.jpg,4.773726
4,img_2541.jpg,4.925868
5,img_2402.jpg,2.135444
6,img_2555.jpg,4.590902
7,img_2444.jpg,4.053454
8,img_1718.jpg,5.289018
9,img_2875.jpg,6.199585


In [113]:
test_data_df_model2[test_data_df_model2['image_name']=='img_2015.jpg'].shape[0]

1

In [114]:
lost_test_items = []

for file_name in test_img_names - set(sample_solution_df['image_name'].values):
    if test_data_df_model2[test_data_df_model2['image_name']==file_name].shape[0]==0:
        lost_test_items.append([file_name, y_mean])
    else:
        lost_test_items.append([file_name, test_data_df_model2[test_data_df_model2['image_name']==file_name]['distance_model3'].values[0]])

In [115]:
lost_test_items_df = pd.DataFrame(lost_test_items, columns=['image_name', 'distance'])

In [116]:
sample_solution_df = pd.concat([sample_solution_df, lost_test_items_df])

In [117]:
sample_solution_df.to_csv('solution7.csv', sep=';', index=False)

In [118]:
lost_test_items

[['img_1868.jpg', 3.6579535263668177],
 ['img_1888.jpg', 2.562394410043778],
 ['img_2938.jpg', 1.7824988548063558],
 ['img_2674.heic', 4.181037735849056],
 ['img_2518.jpg', 3.656724125046391],
 ['img_2571.jpg', 1.725311823034854]]

In [119]:
test_plate_data_df[test_plate_data_df["image_name"]=="img_1888.jpg"]

Unnamed: 0,image_name,max_d,min_d,box_div,x1,y1,x2,y2
496,img_1888.jpg,401.004988,70.007142,5.728058,1687,1711,2058,1791
