### Определение расстояния до впереди идущего автомобиля на основе изображений 🚗

<h1><center>Задача</center></h1>
Разработать алгоритм, позволяющий определить дистанцию до впереди идущего автомобиля,используя для этого датасет фотографий автомобилей с разного расстояния.Впоследствии этот алгоритм может быть использован в системах навигации для предупреждения об опасном сближении и для контроля за соблюдением дистанции.


**Краткое описание решения:**

    ● Изображения преобразованы в черно-белое;
    ● Использовалась предобученная модель YOLOv5l6 для детекции автомобилей;
    ● Полученные координаты были признаками для регрессора;
    ● Из полученных координат созданы доп признаки;
    ● Поиск оптимального регрессора.

In [524]:
import pandas as pd
import math
import os
import cv2
import time
from math import sqrt
from sklearn.metrics import mean_squared_error,r2_score
from PIL import Image
import numpy as np
import pillow_heif
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
import seaborn as sns
from sklearn import preprocessing, model_selection, metrics
from sklearn.linear_model import Ridge, Lasso,ElasticNet,LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import (BaggingRegressor,ExtraTreesRegressor,GradientBoostingRegressor,HistGradientBoostingRegressor,RandomForestRegressor,StackingRegressor)
import pathlib
import warnings
%matplotlib

warnings.filterwarnings("ignore")
import torch


Using matplotlib backend: TkAgg


In [519]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5l6', pretrained=True)
model.cuda()# подключил графический процессор
model.classes = [2]# выбрал класс авто

Using cache found in C:\Users\Oksana/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2022-8-16 Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce GTX 1650, 4096MiB)

Fusing layers... 
YOLOv5l6 summary: 476 layers, 76726332 parameters, 0 gradients
Adding AutoShape... 


In [472]:
sub = pd.read_csv('sample_solution.csv', sep=';', index_col=None)

In [473]:
train_labels_df = pd.read_csv(r'C:\work\Чемпионат_ульяновск\Ульяновск\train_dataset_train/train.csv', sep=';', index_col=None)

In [474]:
path_train = 'participants/train/'
path_test = 'participants/test/'

In [6]:
test_img_names = set(os.listdir('participants/test'))
train_img_names = set(os.listdir('participants/train'))

In [308]:
def increase_brightness(img, value):# фукнция яркости изображения
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return img

In [397]:
train_data = [] 
for img_name in tqdm(train_img_names):
    expansion=(pathlib.Path(img_name).suffix)# вывел расширение изображения
    if 'heic' in img_name:
        heif_file = pillow_heif.read_heif(path_train+ img_name)
        img= Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
        img = img.filter(ImageFilter.SMOOTH)
        img = img.filter(ImageFilter.EDGE_ENHANCE)
        img = img.convert('L')
        shape_img=(3024,4032)
        img=np.asarray(img)
        results = model(img,augment=True)
        results = results.pandas().xyxy[0].to_dict(orient="records")
        for result in results:
               
            con = result['confidence']
            cs = result['class']
            x1 = int(result['xmin'])
            y1 = int(result['ymin'])
            x2 = int(result['xmax'])
            y2 = int(result['ymax'])
            name = result['name']
            image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
            train_data.append(image)
           
    else:
        img = cv2.imread(path_train+img_name)
     
        sharped_img = cv2.medianBlur (img, 7)
        frame = increase_brightness(sharped_img, value=50)
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        results = model(img,augment=True)
        results = results.pandas().xyxy[0].to_dict(orient="records")
        shape_img=str(img.shape)
        if shape_img=='(3024, 4032)': 
            for result in results:
               
                con = result['confidence']
                cs = result['class']
                x1 = int(result['xmin'])
                y1 = int(result['ymin'])
                x2 = int(result['xmax'])
                y2 = int(result['ymax'])
                shape_img=str(img.shape)
                name = result['name']
                image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
                train_data.append(image)
        else:
            imag = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            results = model(imag,augment=True)
            results = results.pandas().xyxy[0].to_dict(orient="records")
            for result in results:
               
                con = result['confidence']
                cs = result['class']
                x1 = int(result['xmin'])
                y1 = int(result['ymin'])
                x2 = int(result['xmax'])
                y2 = int(result['ymax'])
                shape_img=str(imag.shape)
                name = result['name']       
                image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
                train_data.append(image)

  0%|          | 0/530 [00:00<?, ?it/s]

In [490]:
# Создал датафрейм с признаками тренировочных изображений
train_data_df = pd.DataFrame(train_data, columns = ['image_name', 'x_min', 'y_min', 'x_max', 'y_max', 'conf', 'class','name','shape_img','expansion'])

In [476]:
test_data = [] 
for img_name in tqdm(test_img_names):
    expansion=(pathlib.Path(img_name).suffix)
    if 'heic' in img_name:
        heif_file = pillow_heif.read_heif(path_test+ img_name)
        img= Image.frombytes(heif_file.mode, heif_file.size, heif_file.data, "raw", heif_file.mode, heif_file.stride)
        
        img = img.convert('L')
        shape_img=(3024,4032)
        img=np.asarray(img)
        results = model(img,augment=True)
        results = results.pandas().xyxy[0].to_dict(orient="records")
        for result in results:
               
            con = result['confidence']
            cs = result['class']
            x1 = int(result['xmin'])
            y1 = int(result['ymin'])
            x2 = int(result['xmax'])
            y2 = int(result['ymax'])
            name = result['name']
            image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
            test_data.append(image)  
    else:
        img = cv2.imread(path_test+img_name)
     
        sharpen_filter = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])

       
        sharped_img = cv2.filter2D(img, -1, sharpen_filter)
        frame = increase_brightness(sharped_img, value=30)
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        results = model(img,augment=True)
        results = results.pandas().xyxy[0].to_dict(orient="records")
        shape_img=str(img.shape)
        if shape_img=='(3024, 4032)':   
            for result in results:
               
                con = result['confidence']
                cs = result['class']
                x1 = int(result['xmin'])
                y1 = int(result['ymin'])
                x2 = int(result['xmax'])
                y2 = int(result['ymax'])
                shape_img=str(img.shape)
                name = result['name']
           
                image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
        
                test_data.append(image)
        else:
            imag = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
            results = model(imag,augment=True)
            results = results.pandas().xyxy[0].to_dict(orient="records")
            for result in results:
               
                con = result['confidence']
                cs = result['class']
                x1 = int(result['xmin'])
                y1 = int(result['ymin'])
                x2 = int(result['xmax'])
                y2 = int(result['ymax'])
                shape_img=str(imag.shape)
                name = result['name']
           
                image=[img_name,x1,y1,x2,y2,con,cs,name,shape_img,expansion]
        
                test_data.append(image)

  0%|          | 0/521 [00:00<?, ?it/s]

In [491]:
# Создал датафрейм с признаками тестовочных изображений
test_data_df = pd.DataFrame(test_data, columns = ['image_name', 'x_min', 'y_min', 'x_max', 'y_max', 'conf', 'class','name','shape_img','expansion'])

In [492]:
# Создана функция с помощью которой остались автомобили которые по центру изображения, а так же доп признаки

In [493]:
def transform_feature(df,img_h, img_w):
    df['x_center'] = (df['x_min'] + df['x_max'])/2
    df['x_center_total']=abs((img_w/2) -df['x_center'])
    feature_label_=df.image_name.unique().tolist()
    data=pd.DataFrame()
    for i in feature_label_:# т.к. yolov5l6 нашла все авто на изображениях, я отфильтровал по центру
        df_img= df.loc[df['image_name']==i]
        df_x_central=df_img[(df_img['x_center_total'] == df_img['x_center_total'].min())] 
        data=data.append(df_x_central)
   
    data['pixel_distance'] = ((data['x_max'] - data['x_min']) ** 2 + (data['y_max'] - data['y_min']) ** 2)
    data['pixel_distance_sqrt']=data['pixel_distance'].apply(np.sqrt)
    
    data['y_center'] = (data['y_min']+ data['y_max'])/2
  
    data['y_center_total']=abs((img_h/2)- data['y_center'])
    data['w'] = data['x_max'] - data['x_min']
    data['h'] = data['y_max'] - data['y_min']
    data['x_center_w'] = data['x_center']/img_w
    data['w_'] = data['w']/img_w
    data['y_center_h'] = data['y_center']/img_h
    data['h_'] = data['h']/img_h
    data['s']=((data['y_max']-data['y_min'])*(data['x_max']-data['x_min']))# площадь рамки
    data['p']=(((data['y_max']-data['y_min'])+(data['x_max']-data['x_min'])))*2  # периметр рамки
    data['corner']=np.rad2deg(np.arctan((data['y_max']-data['y_min'])/(data['x_max']-data['x_min'])))
    data['corner_img']=np.rad2deg(np.arctan((img_h-data['y_center'])/(img_w-data['x_center'])))
    return data

In [494]:
data_train=transform_feature(train_data_df,3024, 4032)
data_test=transform_feature(test_data_df,3024,4032)

In [496]:
distance=dict(zip(train_labels_df.image_name,train_labels_df.distance))
data_train['distance'] = data_train['image_name'].apply(lambda x: distance.get(x, 0))

In [495]:
data_train.expansion.value_counts()

.jpg     482
.heic     48
Name: expansion, dtype: int64

In [497]:
data_test.expansion.value_counts()

.jpg     485
.heic     36
Name: expansion, dtype: int64

In [527]:
corr = data_train[['x_min', 'y_min', 'x_max','y_max', 'conf','pixel_distance_sqrt','s','p','y_center','distance']].corr()
corr.style.background_gradient(cmap='coolwarm',axis=0)

Unnamed: 0,x_min,y_min,x_max,y_max,conf,pixel_distance_sqrt,s,p,y_center,distance
x_min,1.0,0.719182,-0.811899,-0.88844,-0.514947,-0.950473,-0.886879,-0.950444,-0.496446,0.749611
y_min,0.719182,1.0,-0.782577,-0.487138,-0.186205,-0.790336,-0.734031,-0.789728,0.155961,0.748807
x_max,-0.811899,-0.782577,1.0,0.842458,0.260043,0.950769,0.929371,0.950079,0.399623,-0.7633
y_max,-0.88844,-0.487138,0.842458,1.0,0.498717,0.915653,0.881202,0.91682,0.786664,-0.63834
conf,-0.514947,-0.186205,0.260043,0.498717,1.0,0.416047,0.271439,0.41924,0.432433,-0.283685
pixel_distance_sqrt,-0.950473,-0.790336,0.950769,0.915653,0.416047,1.0,0.953834,0.999966,0.476923,-0.79173
s,-0.886879,-0.734031,0.929371,0.881202,0.271439,0.953834,1.0,0.953363,0.477762,-0.665352
p,-0.950444,-0.789728,0.950079,0.91682,0.41924,0.999966,0.953363,1.0,0.478674,-0.790952
y_center,-0.496446,0.155961,0.399623,0.786664,0.432433,0.476923,0.477762,0.478674,1.0,-0.192631
distance,0.749611,0.748807,-0.7633,-0.63834,-0.283685,-0.79173,-0.665352,-0.790952,-0.192631,1.0


In [499]:
data_train=data_train.drop_duplicates(subset ='distance')

In [500]:
X_train_data=data_train.drop(['class','distance','shape_img','image_name','name','pixel_distance_sqrt','expansion'],axis=1)
X_valid=data_test.drop(['class','shape_img','image_name','name','pixel_distance_sqrt','expansion'],axis=1)
targets=data_train['distance'].values


In [501]:
X_train, X_val, y_train, y_val = train_test_split(X_train_data, targets, test_size=0.05, random_state=42)

In [502]:
models = [ ['Lasso: ', Lasso()],
           ['Ridge: ', Ridge()],
           ['KNeighborsRegressor: ',  KNeighborsRegressor()],
           ['BaggingRegressor: ',  BaggingRegressor(n_jobs=-1)],
           ['RandomForest ', RandomForestRegressor()],
           ['ExtraTreeRegressor :',ExtraTreesRegressor(n_jobs=-1)],
           ['GradientBoostingRegressor: ', GradientBoostingRegressor()] ,
           ['XGBRegressor: ', XGBRegressor(n_jobs=-1)] ,
           ['DecisionTreeRegressor: ', DecisionTreeRegressor()] ,
           ['LGBM: ', LGBMRegressor()] ,
           ['CatBoostRegressor: ', CatBoostRegressor()] ,
           ['HistGradientBoostingRegressor: ', HistGradientBoostingRegressor()] ,
           ['ElasticNet: ', ElasticNet()] ,
           ['StackingRegressor: ', StackingRegressor([('Extra',ExtraTreesRegressor() ),('GradientBoostingRegressor',GradientBoostingRegressor() ), ('Random',RandomForestRegressor())], n_jobs=-1)] ,
           
         ]

In [503]:
model_train = []
for name,train_model in models :
    train_model_data = {}
    train_model.random_state = 42
    train_model_data["Регрессор"] = name
    start = time.time()
    train_model.fit(X_train,y_train)
    end = time.time()
    train_model_data["Время обучения"] = end - start
    train_model_data["Train_R2_Score"] = r2_score(y_train,train_model.predict(X_train))
    train_model_data["Test_R2_Score"] = r2_score(y_val,train_model.predict(X_val))
    train_model_data["Test_RMSE_Score"] = sqrt(mean_squared_error(y_val,train_model.predict(X_val)))
    model_train.append(train_model_data)

Learning rate set to 0.034364
0:	learn: 1.8995721	total: 4.22ms	remaining: 4.22s
1:	learn: 1.8600952	total: 8.21ms	remaining: 4.1s
2:	learn: 1.8139551	total: 12.5ms	remaining: 4.17s
3:	learn: 1.7676755	total: 16.5ms	remaining: 4.11s
4:	learn: 1.7250544	total: 20.5ms	remaining: 4.08s
5:	learn: 1.6856883	total: 24.4ms	remaining: 4.04s
6:	learn: 1.6457964	total: 28.3ms	remaining: 4.01s
7:	learn: 1.6066342	total: 32.3ms	remaining: 4s
8:	learn: 1.5678161	total: 36.2ms	remaining: 3.98s
9:	learn: 1.5401745	total: 40.1ms	remaining: 3.97s
10:	learn: 1.5061180	total: 44ms	remaining: 3.96s
11:	learn: 1.4700740	total: 47.8ms	remaining: 3.94s
12:	learn: 1.4382722	total: 51.8ms	remaining: 3.93s
13:	learn: 1.4081689	total: 55.6ms	remaining: 3.92s
14:	learn: 1.3763366	total: 59.5ms	remaining: 3.91s
15:	learn: 1.3485498	total: 62.3ms	remaining: 3.83s
16:	learn: 1.3218609	total: 64.9ms	remaining: 3.75s
17:	learn: 1.2930607	total: 67.6ms	remaining: 3.69s
18:	learn: 1.2650145	total: 70.2ms	remaining: 3.62

204:	learn: 0.3884364	total: 692ms	remaining: 2.68s
205:	learn: 0.3877708	total: 695ms	remaining: 2.68s
206:	learn: 0.3870309	total: 698ms	remaining: 2.67s
207:	learn: 0.3868257	total: 700ms	remaining: 2.67s
208:	learn: 0.3857260	total: 704ms	remaining: 2.66s
209:	learn: 0.3845954	total: 706ms	remaining: 2.66s
210:	learn: 0.3844086	total: 709ms	remaining: 2.65s
211:	learn: 0.3841975	total: 712ms	remaining: 2.65s
212:	learn: 0.3836848	total: 714ms	remaining: 2.64s
213:	learn: 0.3832710	total: 717ms	remaining: 2.63s
214:	learn: 0.3824923	total: 720ms	remaining: 2.63s
215:	learn: 0.3820252	total: 722ms	remaining: 2.62s
216:	learn: 0.3816068	total: 725ms	remaining: 2.62s
217:	learn: 0.3805189	total: 728ms	remaining: 2.61s
218:	learn: 0.3803346	total: 730ms	remaining: 2.6s
219:	learn: 0.3800667	total: 733ms	remaining: 2.6s
220:	learn: 0.3798142	total: 735ms	remaining: 2.59s
221:	learn: 0.3795666	total: 738ms	remaining: 2.59s
222:	learn: 0.3789783	total: 741ms	remaining: 2.58s
223:	learn: 0.

399:	learn: 0.2626121	total: 1.22s	remaining: 1.83s
400:	learn: 0.2624704	total: 1.22s	remaining: 1.83s
401:	learn: 0.2621614	total: 1.23s	remaining: 1.82s
402:	learn: 0.2609964	total: 1.23s	remaining: 1.82s
403:	learn: 0.2608991	total: 1.23s	remaining: 1.82s
404:	learn: 0.2603186	total: 1.23s	remaining: 1.81s
405:	learn: 0.2595145	total: 1.24s	remaining: 1.81s
406:	learn: 0.2592095	total: 1.24s	remaining: 1.8s
407:	learn: 0.2581735	total: 1.24s	remaining: 1.8s
408:	learn: 0.2573321	total: 1.24s	remaining: 1.8s
409:	learn: 0.2564551	total: 1.25s	remaining: 1.79s
410:	learn: 0.2563707	total: 1.25s	remaining: 1.79s
411:	learn: 0.2559620	total: 1.25s	remaining: 1.79s
412:	learn: 0.2550231	total: 1.25s	remaining: 1.78s
413:	learn: 0.2547424	total: 1.26s	remaining: 1.78s
414:	learn: 0.2546615	total: 1.26s	remaining: 1.78s
415:	learn: 0.2545580	total: 1.26s	remaining: 1.77s
416:	learn: 0.2538902	total: 1.26s	remaining: 1.77s
417:	learn: 0.2536255	total: 1.27s	remaining: 1.76s
418:	learn: 0.2

584:	learn: 0.1804422	total: 1.75s	remaining: 1.24s
585:	learn: 0.1799847	total: 1.75s	remaining: 1.24s
586:	learn: 0.1798547	total: 1.75s	remaining: 1.23s
587:	learn: 0.1797630	total: 1.76s	remaining: 1.23s
588:	learn: 0.1796163	total: 1.76s	remaining: 1.23s
589:	learn: 0.1792975	total: 1.76s	remaining: 1.23s
590:	learn: 0.1791834	total: 1.77s	remaining: 1.22s
591:	learn: 0.1789741	total: 1.77s	remaining: 1.22s
592:	learn: 0.1788627	total: 1.77s	remaining: 1.22s
593:	learn: 0.1787926	total: 1.77s	remaining: 1.21s
594:	learn: 0.1780996	total: 1.78s	remaining: 1.21s
595:	learn: 0.1780265	total: 1.78s	remaining: 1.21s
596:	learn: 0.1773787	total: 1.78s	remaining: 1.21s
597:	learn: 0.1772530	total: 1.79s	remaining: 1.2s
598:	learn: 0.1765333	total: 1.79s	remaining: 1.2s
599:	learn: 0.1764813	total: 1.79s	remaining: 1.2s
600:	learn: 0.1760010	total: 1.8s	remaining: 1.19s
601:	learn: 0.1756328	total: 1.8s	remaining: 1.19s
602:	learn: 0.1755091	total: 1.8s	remaining: 1.19s
603:	learn: 0.1750

744:	learn: 0.1361320	total: 2.27s	remaining: 778ms
745:	learn: 0.1358852	total: 2.27s	remaining: 775ms
746:	learn: 0.1357106	total: 2.28s	remaining: 772ms
747:	learn: 0.1353364	total: 2.28s	remaining: 769ms
748:	learn: 0.1352671	total: 2.29s	remaining: 766ms
749:	learn: 0.1350108	total: 2.29s	remaining: 763ms
750:	learn: 0.1348063	total: 2.29s	remaining: 760ms
751:	learn: 0.1346928	total: 2.3s	remaining: 758ms
752:	learn: 0.1343839	total: 2.3s	remaining: 755ms
753:	learn: 0.1340895	total: 2.31s	remaining: 752ms
754:	learn: 0.1337702	total: 2.31s	remaining: 749ms
755:	learn: 0.1337044	total: 2.31s	remaining: 746ms
756:	learn: 0.1332985	total: 2.32s	remaining: 744ms
757:	learn: 0.1332303	total: 2.32s	remaining: 741ms
758:	learn: 0.1328849	total: 2.33s	remaining: 739ms
759:	learn: 0.1325738	total: 2.33s	remaining: 736ms
760:	learn: 0.1321065	total: 2.33s	remaining: 733ms
761:	learn: 0.1316412	total: 2.34s	remaining: 730ms
762:	learn: 0.1314426	total: 2.34s	remaining: 727ms
763:	learn: 0.

904:	learn: 0.1042798	total: 2.79s	remaining: 293ms
905:	learn: 0.1041820	total: 2.8s	remaining: 290ms
906:	learn: 0.1038988	total: 2.8s	remaining: 287ms
907:	learn: 0.1038719	total: 2.8s	remaining: 284ms
908:	learn: 0.1036293	total: 2.81s	remaining: 281ms
909:	learn: 0.1033121	total: 2.81s	remaining: 278ms
910:	learn: 0.1031361	total: 2.81s	remaining: 275ms
911:	learn: 0.1029492	total: 2.81s	remaining: 271ms
912:	learn: 0.1026821	total: 2.82s	remaining: 268ms
913:	learn: 0.1026603	total: 2.82s	remaining: 265ms
914:	learn: 0.1023721	total: 2.82s	remaining: 262ms
915:	learn: 0.1022558	total: 2.82s	remaining: 259ms
916:	learn: 0.1021722	total: 2.83s	remaining: 256ms
917:	learn: 0.1018695	total: 2.83s	remaining: 253ms
918:	learn: 0.1018222	total: 2.83s	remaining: 250ms
919:	learn: 0.1017604	total: 2.83s	remaining: 247ms
920:	learn: 0.1014201	total: 2.84s	remaining: 243ms
921:	learn: 0.1012319	total: 2.84s	remaining: 240ms
922:	learn: 0.1009156	total: 2.84s	remaining: 237ms
923:	learn: 0.1

In [505]:
df = pd.DataFrame(model_train)
df=df.sort_values(by=['Test_R2_Score'],ascending=False)
df

Unnamed: 0,Регрессор,Время обучения,Train_R2_Score,Test_R2_Score,Test_RMSE_Score
6,GradientBoostingRegressor:,0.246361,0.988057,0.865562,0.53039
2,KNeighborsRegressor:,0.002001,0.899619,0.863212,0.535007
10,CatBoostRegressor:,3.333425,0.997954,0.838438,0.581439
7,XGBRegressor:,0.487384,1.0,0.829594,0.597142
4,RandomForest,0.742459,0.983918,0.824678,0.605694
13,StackingRegressor:,2.510615,0.996253,0.822904,0.60875
3,BaggingRegressor:,3.162575,0.979595,0.80191,0.643822
5,ExtraTreeRegressor :,0.319192,1.0,0.789737,0.66331
11,HistGradientBoostingRegressor:,0.398955,0.98004,0.760745,0.707563
9,LGBM:,0.051012,0.980265,0.747698,0.726599


In [504]:
from sklearn.model_selection import GridSearchCV
param_grid = [{
              'max_depth': [10,80, 150, 200,250],
              'n_estimators' : [100,150,200,250,300],
              'max_features': ["auto", "sqrt", "log2"],
              'ccp_alpha': [0.001,0.01,0.1],
         
            }]
reg = ExtraTreesRegressor()

grid_search = GridSearchCV(estimator = reg, param_grid = param_grid, cv = 7, n_jobs=-1, scoring='r2' , verbose=2)
grid_search.fit(X_train_data, targets)

Fitting 7 folds for each of 225 candidates, totalling 1575 fits


In [506]:
grid_search.best_params_

{'ccp_alpha': 0.001,
 'max_depth': 200,
 'max_features': 'log2',
 'n_estimators': 250}

In [509]:
params = [{'n_estimators': [10,30,40, 100,200,300, 400],
           'learning_rate': [0.05, 0.1, 0.5],
           'min_samples_split': [8, 12],
           'min_samples_leaf': [2, 3, 4],
           'max_depth': [2, 3, 4]}]

gboost_reg = GradientBoostingRegressor()

cv_scheme = KFold(n_splits=7, shuffle=True, random_state=1)
cv = GridSearchCV(estimator=gboost_reg, param_grid=params, scoring='r2',
                  cv=cv_scheme, return_train_score=True, n_jobs=-1,verbose=2)

cv.fit(X_train_data, targets)

Fitting 7 folds for each of 378 candidates, totalling 2646 fits


In [510]:
cv.best_params_

{'learning_rate': 0.05,
 'max_depth': 4,
 'min_samples_leaf': 2,
 'min_samples_split': 8,
 'n_estimators': 300}

In [511]:
feature_cols=X_train_data.columns

In [512]:
cv = KFold(n_splits=15, random_state=7777, shuffle=True)
val_preds  = np.zeros(len(data_train))
train_preds = np.zeros(len(data_train))
val_preds_  = np.zeros(len(data_train))
train_preds_ = np.zeros(len(data_train))
models = []
tree_params = {      'ccp_alpha': 0.001,
                     'max_depth': 200,
                     'max_features': 'sqrt',
                     'n_estimators': 250}

gradient_params = {  'learning_rate': 0.05,
                     'max_depth': 4,
                     'min_samples_leaf': 2,
                     'min_samples_split': 8,
                     'n_estimators': 300}

for fold_, (train_idx, val_idx) in enumerate(cv.split(data_train, targets), 1):
    print(f"Training with fold {fold_} started")
    Extra_model = ExtraTreesRegressor(**tree_params)
    Gradient_model = GradientBoostingRegressor(**gradient_params)
    train, val = data_train.iloc[train_idx], data_train.iloc[val_idx]
    
    Extra_model.fit(train[feature_cols], train.distance.values)
    Gradient_model.fit(train[feature_cols], train.distance.values)
    
    val_preds[val_idx] = Extra_model.predict(val[feature_cols])
    val_preds_[val_idx] = Gradient_model.predict(val[feature_cols])
    train_preds[train_idx] += Extra_model.predict(train[feature_cols]) / (cv.n_splits-1)
    train_preds_[train_idx] += Gradient_model.predict(train[feature_cols]) / (cv.n_splits-1)
    models.append(Extra_model)
    models.append(Gradient_model)

    print('ExtraTrees', r2_score(val_preds[val_idx],val.distance.values))
    print('GradientBoosting', r2_score(val_preds_[val_idx],val.distance.values))
    print(f"Training with fold {fold_} completed")

Training with fold 1 started
ExtraTrees 0.8862800263543934
GradientBoosting 0.8553659530367973
Training with fold 1 completed
Training with fold 2 started
ExtraTrees 0.9018880014613023
GradientBoosting 0.8679551813940858
Training with fold 2 completed
Training with fold 3 started
ExtraTrees 0.9541636360287707
GradientBoosting 0.9328426409461665
Training with fold 3 completed
Training with fold 4 started
ExtraTrees 0.8999838519701254
GradientBoosting 0.7677704354054008
Training with fold 4 completed
Training with fold 5 started
ExtraTrees 0.9301541390682886
GradientBoosting 0.9332082775544868
Training with fold 5 completed
Training with fold 6 started
ExtraTrees 0.9361079565458302
GradientBoosting 0.939932244448946
Training with fold 6 completed
Training with fold 7 started
ExtraTrees 0.8529277371727662
GradientBoosting 0.9073143479123069
Training with fold 7 completed
Training with fold 8 started
ExtraTrees 0.7036632930135066
GradientBoosting 0.7018656283096194
Training with fold 8 com

In [513]:
print("Train Extra : ", r2_score(targets, train_preds))
print("Train Random : ", r2_score(targets, train_preds1))

Train Extra :  0.9948903347691159
Train Random :  0.9884109237872288


In [514]:
print("Test Extra: ", r2_score(targets, val_preds))
print("Test Random: ", r2_score(targets, val_preds1))

Test Extra:  0.8963003089889351
Test Random:  0.8917458843791196


In [515]:
import tqdm
score = np.zeros(len(data_test))

for model in tqdm.tqdm_notebook(models):
    score += model.predict(data_test[feature_cols]) / len(models)
    
submission = pd.DataFrame({
    "image_name" : data_test["image_name"].values,
    "distance" : score
}) 

submission.to_csv("submission.csv", sep=';', index=False)

  0%|          | 0/30 [00:00<?, ?it/s]

In [516]:
submission

Unnamed: 0,image_name,distance
0,img_2386.jpg,2.284082
1,img_1825.jpg,4.812706
2,img_2013.jpg,4.183098
3,img_1920.jpg,2.713581
4,img_2873.jpg,6.328343
...,...,...
516,img_2506.jpg,6.395616
517,img_2644.jpg,5.309754
518,img_2790.jpg,4.407507
519,img_1998.jpg,2.818792
