#### Условие задачи

Создать ML-модель, которая сможет прогнозировать количество пациентов по каждому виду заболевания согласно классификации МКБ-10, с учетом информации о месте проживания, половозрастных характеристиках людей, а также времени года.

Датасет предоставлен Медицинским информационно-аналитическим центром Калининградской области

#### Расшифровка признаков:  
PATIENT_SEX – пол группы пациентов  
MKB_CODE – первичный диагноз группы пациентов, код МКБ-10  
ADRES – населенный пункт группы пациентов  
VISIT_MONTH_YEAR – месяц и год постановки диагноза  
AGE_CATEGORY – возрастная категория группы пациентов (Классификация ВОЗ)  
PATIENT_ID_COUNT – кол-во пациентов в группе, которая характеризуется вышеперечисленными признаками 

#### Метрика
Коэффициент детерминации (R2)

In [1]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
train_base = pd.read_csv('train_dataset_train.csv', sep=';', index_col=None, 
                    dtype={'PATIENT_SEX':str, 'MKB_CODE':str, 'ADRES':str,
                           'VISIT_MONTH_YEAR':str, 'AGE_CATEGORY':str, 'PATIENT_ID_COUNT':int})
test = pd.read_csv('test_dataset_test.csv', sep=';', index_col=None, 
                   dtype={'PATIENT_SEX':str, 'MKB_CODE':str, 'ADRES':str, 'VISIT_MONTH_YEAR':str, 'AGE_CATEGORY':str})

In [3]:
train_base.head()

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00.0,Гурьевск,8.21,young,1
1,0,A00.0,Калининград,3.2,children,1
2,0,A00,Гусев,3.19,children,1
3,0,A00,Калининград,1.22,children,1
4,0,A00,Калининград,2.18,children,1


In [4]:
test.head()

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY
0,0,A00,Калининград,4.22,children
1,0,A00,Калининград,4.22,elderly
2,0,A00,Калининград,4.22,middleage
3,0,A00,Калининград,4.22,young
4,0,A01,Калининград,4.22,middleage


In [5]:
train_base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2212393 entries, 0 to 2212392
Data columns (total 6 columns):
 #   Column            Dtype 
---  ------            ----- 
 0   PATIENT_SEX       object
 1   MKB_CODE          object
 2   ADRES             object
 3   VISIT_MONTH_YEAR  object
 4   AGE_CATEGORY      object
 5   PATIENT_ID_COUNT  int32 
dtypes: int32(1), object(5)
memory usage: 92.8+ MB


In [6]:
#выбрасываем все строки где меньше 30 посещений (id)
train= train_base
train = train_base[train_base['PATIENT_ID_COUNT']>30]
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54729 entries, 1969 to 2210125
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   PATIENT_SEX       54729 non-null  object
 1   MKB_CODE          54729 non-null  object
 2   ADRES             54729 non-null  object
 3   VISIT_MONTH_YEAR  54729 non-null  object
 4   AGE_CATEGORY      54729 non-null  object
 5   PATIENT_ID_COUNT  54729 non-null  int32 
dtypes: int32(1), object(5)
memory usage: 2.7+ MB


In [7]:
#выбрасываем все строки где количество событий меньше 3 (id.count)
#train['COUNT'] = train.groupby(['PATIENT_ID_COUNT'])['PATIENT_ID_COUNT'].transform('count')
#train = train.loc[train['COUNT'] > 2]
#train = train.reset_index(drop=True)
#train = train.drop(columns=['COUNT'])

In [8]:
#train.info()

In [9]:
train

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
1969,0,A08.0,Калининград,04.19,children,31
2871,0,A09,Калининград,01.18,children,59
2886,0,A09,Калининград,02.18,children,51
2898,0,A09,Калининград,03.18,children,43
2911,0,A09,Калининград,04.18,children,45
...,...,...,...,...,...,...
2210121,1,Z76.2,Пионерский,11.21,children,41
2210122,1,Z76.2,Пионерский,12.18,children,42
2210123,1,Z76.2,Пионерский,12.19,children,37
2210124,1,Z76.2,Пионерский,12.20,children,39


In [10]:
#Отделение меток от данных

X = train[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY']]
y = train[['PATIENT_ID_COUNT']]

In [11]:
#Разделение на train/test для локального тестирования

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [12]:
#Создание объекта данных Pool, плюсы: возможность указать какие признаки являются категориальными

pool_train = Pool(X_train, y_train, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])
pool_test = Pool(X_test, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])

In [13]:
#Объявление CatBoostRegressor и обучение

model = CatBoostRegressor(task_type='GPU') #, early_stopping_rounds=100, eval_metric='R2', ) 
model.fit(pool_train)

Learning rate set to 0.066271
0:	learn: 349.1634814	total: 54.7ms	remaining: 54.7s
1:	learn: 336.8845285	total: 120ms	remaining: 1m
2:	learn: 326.2837261	total: 177ms	remaining: 58.7s
3:	learn: 316.5861894	total: 233ms	remaining: 57.9s
4:	learn: 308.3274494	total: 291ms	remaining: 57.8s
5:	learn: 300.2443065	total: 353ms	remaining: 58.4s
6:	learn: 291.8390481	total: 407ms	remaining: 57.7s
7:	learn: 285.1362235	total: 465ms	remaining: 57.6s
8:	learn: 279.0597582	total: 527ms	remaining: 58s
9:	learn: 273.6143604	total: 583ms	remaining: 57.7s
10:	learn: 266.2003856	total: 638ms	remaining: 57.4s
11:	learn: 261.5323682	total: 694ms	remaining: 57.2s
12:	learn: 257.3748569	total: 754ms	remaining: 57.2s
13:	learn: 251.7674454	total: 813ms	remaining: 57.2s
14:	learn: 246.3653725	total: 867ms	remaining: 57s
15:	learn: 243.2571013	total: 924ms	remaining: 56.8s
16:	learn: 240.4070458	total: 982ms	remaining: 56.8s
17:	learn: 237.5659439	total: 1.04s	remaining: 56.8s
18:	learn: 235.1608243	total: 1.

154:	learn: 148.4279137	total: 9.54s	remaining: 52s
155:	learn: 148.4061771	total: 9.6s	remaining: 51.9s
156:	learn: 148.0699514	total: 9.67s	remaining: 51.9s
157:	learn: 147.4468540	total: 9.74s	remaining: 51.9s
158:	learn: 147.3300334	total: 9.8s	remaining: 51.9s
159:	learn: 147.2648437	total: 9.87s	remaining: 51.8s
160:	learn: 147.1123854	total: 9.93s	remaining: 51.8s
161:	learn: 147.0998006	total: 9.99s	remaining: 51.7s
162:	learn: 147.0144951	total: 10.1s	remaining: 51.7s
163:	learn: 146.9001268	total: 10.1s	remaining: 51.6s
164:	learn: 146.8628668	total: 10.2s	remaining: 51.6s
165:	learn: 146.3179087	total: 10.3s	remaining: 51.6s
166:	learn: 146.2829286	total: 10.3s	remaining: 51.6s
167:	learn: 146.0923295	total: 10.4s	remaining: 51.5s
168:	learn: 146.0745583	total: 10.5s	remaining: 51.4s
169:	learn: 146.0002982	total: 10.5s	remaining: 51.3s
170:	learn: 145.8971482	total: 10.6s	remaining: 51.3s
171:	learn: 145.8199809	total: 10.7s	remaining: 51.3s
172:	learn: 145.6654635	total: 1

308:	learn: 134.9655233	total: 19.5s	remaining: 43.7s
309:	learn: 134.9448462	total: 19.6s	remaining: 43.7s
310:	learn: 134.9435571	total: 19.7s	remaining: 43.6s
311:	learn: 134.9424143	total: 19.8s	remaining: 43.6s
312:	learn: 134.9353947	total: 19.8s	remaining: 43.5s
313:	learn: 134.9280118	total: 19.9s	remaining: 43.5s
314:	learn: 134.9209102	total: 20s	remaining: 43.4s
315:	learn: 134.9189871	total: 20s	remaining: 43.4s
316:	learn: 134.8683706	total: 20.1s	remaining: 43.3s
317:	learn: 134.8232157	total: 20.2s	remaining: 43.2s
318:	learn: 134.8223158	total: 20.2s	remaining: 43.2s
319:	learn: 134.7681596	total: 20.3s	remaining: 43.2s
320:	learn: 134.7673895	total: 20.4s	remaining: 43.1s
321:	learn: 134.7026149	total: 20.4s	remaining: 43s
322:	learn: 134.7019095	total: 20.5s	remaining: 43s
323:	learn: 134.6955937	total: 20.6s	remaining: 42.9s
324:	learn: 134.1513537	total: 20.6s	remaining: 42.9s
325:	learn: 134.1501006	total: 20.7s	remaining: 42.8s
326:	learn: 134.0186419	total: 20.8s

461:	learn: 127.8818003	total: 29.8s	remaining: 34.7s
462:	learn: 127.8191171	total: 29.9s	remaining: 34.6s
463:	learn: 127.8110086	total: 29.9s	remaining: 34.6s
464:	learn: 127.8098878	total: 30s	remaining: 34.5s
465:	learn: 127.7622555	total: 30.1s	remaining: 34.5s
466:	learn: 127.6372506	total: 30.2s	remaining: 34.4s
467:	learn: 127.6364662	total: 30.2s	remaining: 34.4s
468:	learn: 127.6310147	total: 30.3s	remaining: 34.3s
469:	learn: 127.6299896	total: 30.4s	remaining: 34.3s
470:	learn: 127.5793513	total: 30.5s	remaining: 34.2s
471:	learn: 127.5379826	total: 30.5s	remaining: 34.2s
472:	learn: 127.5326874	total: 30.6s	remaining: 34.1s
473:	learn: 127.5317761	total: 30.7s	remaining: 34.1s
474:	learn: 127.5314609	total: 30.8s	remaining: 34s
475:	learn: 127.4777850	total: 30.8s	remaining: 34s
476:	learn: 127.4282792	total: 30.9s	remaining: 33.9s
477:	learn: 127.4264495	total: 31s	remaining: 33.8s
478:	learn: 127.4105091	total: 31.1s	remaining: 33.8s
479:	learn: 127.3375040	total: 31.1s

614:	learn: 124.5112219	total: 39.6s	remaining: 24.8s
615:	learn: 124.5111808	total: 39.7s	remaining: 24.7s
616:	learn: 124.5111045	total: 39.7s	remaining: 24.7s
617:	learn: 124.4859491	total: 39.8s	remaining: 24.6s
618:	learn: 124.4855851	total: 39.8s	remaining: 24.5s
619:	learn: 124.4635897	total: 39.9s	remaining: 24.5s
620:	learn: 124.4635545	total: 40s	remaining: 24.4s
621:	learn: 124.4628968	total: 40s	remaining: 24.3s
622:	learn: 124.4623213	total: 40.1s	remaining: 24.3s
623:	learn: 124.4622391	total: 40.1s	remaining: 24.2s
624:	learn: 124.3501396	total: 40.2s	remaining: 24.1s
625:	learn: 124.3497870	total: 40.3s	remaining: 24.1s
626:	learn: 124.3061735	total: 40.4s	remaining: 24s
627:	learn: 124.3054738	total: 40.4s	remaining: 24s
628:	learn: 124.3054503	total: 40.5s	remaining: 23.9s
629:	learn: 124.3054032	total: 40.6s	remaining: 23.8s
630:	learn: 124.2734195	total: 40.7s	remaining: 23.8s
631:	learn: 124.2733783	total: 40.7s	remaining: 23.7s
632:	learn: 124.2728549	total: 40.8s

767:	learn: 122.8764986	total: 49.8s	remaining: 15.1s
768:	learn: 122.8489857	total: 49.9s	remaining: 15s
769:	learn: 122.7508886	total: 50s	remaining: 14.9s
770:	learn: 122.7260930	total: 50.1s	remaining: 14.9s
771:	learn: 122.7006372	total: 50.1s	remaining: 14.8s
772:	learn: 122.6936797	total: 50.2s	remaining: 14.7s
773:	learn: 122.6936499	total: 50.2s	remaining: 14.7s
774:	learn: 122.6575218	total: 50.3s	remaining: 14.6s
775:	learn: 122.6456753	total: 50.4s	remaining: 14.5s
776:	learn: 122.6285174	total: 50.4s	remaining: 14.5s
777:	learn: 122.6285174	total: 50.5s	remaining: 14.4s
778:	learn: 122.6219194	total: 50.5s	remaining: 14.3s
779:	learn: 122.5867420	total: 50.6s	remaining: 14.3s
780:	learn: 122.5494134	total: 50.7s	remaining: 14.2s
781:	learn: 122.5193634	total: 50.8s	remaining: 14.2s
782:	learn: 122.5095738	total: 50.8s	remaining: 14.1s
783:	learn: 122.4918180	total: 50.9s	remaining: 14s
784:	learn: 122.4752473	total: 51s	remaining: 14s
785:	learn: 122.4691662	total: 51s	rem

921:	learn: 120.1591100	total: 1m	remaining: 5.1s
922:	learn: 120.1590857	total: 1m	remaining: 5.03s
923:	learn: 120.1419559	total: 1m	remaining: 4.97s
924:	learn: 120.1136828	total: 1m	remaining: 4.9s
925:	learn: 120.1110967	total: 1m	remaining: 4.84s
926:	learn: 120.0887688	total: 1m	remaining: 4.77s
927:	learn: 120.0703020	total: 1m	remaining: 4.71s
928:	learn: 120.0689019	total: 1m	remaining: 4.64s
929:	learn: 119.9895663	total: 1m	remaining: 4.58s
930:	learn: 119.9861064	total: 1m	remaining: 4.51s
931:	learn: 119.9860943	total: 1m	remaining: 4.44s
932:	learn: 119.9692627	total: 1m	remaining: 4.38s
933:	learn: 119.9424662	total: 1m 1s	remaining: 4.31s
934:	learn: 119.9273227	total: 1m 1s	remaining: 4.25s
935:	learn: 119.9154077	total: 1m 1s	remaining: 4.18s
936:	learn: 119.8713758	total: 1m 1s	remaining: 4.12s
937:	learn: 119.8683394	total: 1m 1s	remaining: 4.05s
938:	learn: 119.8625956	total: 1m 1s	remaining: 3.99s
939:	learn: 119.8534000	total: 1m 1s	remaining: 3.92s
940:	learn: 

<catboost.core.CatBoostRegressor at 0x28a9b825a30>

In [14]:
#Получение ответов модели на тестовой выборке в локальном тестировании 

y_pred = model.predict(pool_test)

In [15]:
#На локальном тестировании модель выдаёт такой результат

print("Значение метрики R2 на test: ", r2_score(y_test, y_pred))

Значение метрики R2 на test:  0.9044611331221931


In [16]:
#Формируем sample_solution. В обучении используется весь train, ответы получаем на test
#X = train_base[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY']]
#y = train_base[['PATIENT_ID_COUNT']]

pool_train_solution = Pool(X, y, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])
pool_test_solution = Pool(test, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])

model_solution = CatBoostRegressor(task_type='GPU') #, early_stopping_rounds=20, eval_metric='r2')
model_solution.fit(pool_train_solution)

Learning rate set to 0.068237
0:	learn: 347.1156610	total: 32.5ms	remaining: 32.5s
1:	learn: 334.1487473	total: 67.2ms	remaining: 33.5s
2:	learn: 321.3812358	total: 100ms	remaining: 33.4s
3:	learn: 311.8149534	total: 134ms	remaining: 33.3s
4:	learn: 302.0794824	total: 166ms	remaining: 33s
5:	learn: 293.6242337	total: 201ms	remaining: 33.4s
6:	learn: 285.7502560	total: 244ms	remaining: 34.6s
7:	learn: 278.9932824	total: 285ms	remaining: 35.3s
8:	learn: 273.0348149	total: 324ms	remaining: 35.7s
9:	learn: 267.2506591	total: 359ms	remaining: 35.5s
10:	learn: 261.8738597	total: 396ms	remaining: 35.6s
11:	learn: 257.7664848	total: 431ms	remaining: 35.5s
12:	learn: 253.1145930	total: 466ms	remaining: 35.4s
13:	learn: 248.4588073	total: 501ms	remaining: 35.3s
14:	learn: 244.8015419	total: 540ms	remaining: 35.5s
15:	learn: 241.0770936	total: 576ms	remaining: 35.4s
16:	learn: 238.2674808	total: 613ms	remaining: 35.4s
17:	learn: 233.2848745	total: 649ms	remaining: 35.4s
18:	learn: 229.7206312	tot

156:	learn: 152.0554171	total: 5.46s	remaining: 29.3s
157:	learn: 151.9473025	total: 5.5s	remaining: 29.3s
158:	learn: 151.9091175	total: 5.53s	remaining: 29.3s
159:	learn: 151.7965617	total: 5.57s	remaining: 29.3s
160:	learn: 151.5273707	total: 5.61s	remaining: 29.2s
161:	learn: 151.5194061	total: 5.64s	remaining: 29.2s
162:	learn: 151.0196795	total: 5.67s	remaining: 29.1s
163:	learn: 150.5002838	total: 5.7s	remaining: 29.1s
164:	learn: 150.3103100	total: 5.74s	remaining: 29s
165:	learn: 150.2542454	total: 5.76s	remaining: 29s
166:	learn: 150.1433229	total: 5.8s	remaining: 28.9s
167:	learn: 150.1307750	total: 5.83s	remaining: 28.9s
168:	learn: 150.0062208	total: 5.87s	remaining: 28.8s
169:	learn: 149.8884601	total: 5.91s	remaining: 28.8s
170:	learn: 149.7188780	total: 5.94s	remaining: 28.8s
171:	learn: 149.4812542	total: 5.97s	remaining: 28.7s
172:	learn: 149.1971878	total: 6s	remaining: 28.7s
173:	learn: 149.1358668	total: 6.04s	remaining: 28.7s
174:	learn: 149.1189525	total: 6.07s	r

312:	learn: 132.2121539	total: 10.9s	remaining: 24s
313:	learn: 132.1439646	total: 11s	remaining: 23.9s
314:	learn: 132.0435784	total: 11s	remaining: 23.9s
315:	learn: 131.8242152	total: 11.1s	remaining: 23.9s
316:	learn: 131.7570942	total: 11.1s	remaining: 23.9s
317:	learn: 131.5580282	total: 11.1s	remaining: 23.9s
318:	learn: 131.4335878	total: 11.2s	remaining: 23.9s
319:	learn: 131.3917373	total: 11.2s	remaining: 23.9s
320:	learn: 131.3380766	total: 11.3s	remaining: 23.9s
321:	learn: 131.2225311	total: 11.3s	remaining: 23.9s
322:	learn: 131.2022780	total: 11.4s	remaining: 23.9s
323:	learn: 131.1824630	total: 11.5s	remaining: 23.9s
324:	learn: 131.0117592	total: 11.5s	remaining: 23.9s
325:	learn: 130.9365592	total: 11.6s	remaining: 23.9s
326:	learn: 130.9357867	total: 11.6s	remaining: 23.9s
327:	learn: 130.9022596	total: 11.7s	remaining: 23.9s
328:	learn: 130.8724500	total: 11.7s	remaining: 23.9s
329:	learn: 130.7060742	total: 11.8s	remaining: 23.9s
330:	learn: 130.7036451	total: 11.

466:	learn: 120.8371271	total: 17s	remaining: 19.4s
467:	learn: 120.7466717	total: 17s	remaining: 19.4s
468:	learn: 120.6249994	total: 17.1s	remaining: 19.3s
469:	learn: 120.5841447	total: 17.1s	remaining: 19.3s
470:	learn: 120.5441493	total: 17.1s	remaining: 19.2s
471:	learn: 120.4616723	total: 17.2s	remaining: 19.2s
472:	learn: 120.4013970	total: 17.2s	remaining: 19.2s
473:	learn: 120.2610230	total: 17.2s	remaining: 19.1s
474:	learn: 120.1956903	total: 17.3s	remaining: 19.1s
475:	learn: 120.0870788	total: 17.3s	remaining: 19.1s
476:	learn: 120.0288368	total: 17.4s	remaining: 19s
477:	learn: 119.9917896	total: 17.4s	remaining: 19s
478:	learn: 119.8141605	total: 17.4s	remaining: 18.9s
479:	learn: 119.7691336	total: 17.4s	remaining: 18.9s
480:	learn: 119.6294791	total: 17.5s	remaining: 18.9s
481:	learn: 119.6262679	total: 17.5s	remaining: 18.8s
482:	learn: 119.5826811	total: 17.5s	remaining: 18.8s
483:	learn: 119.5523330	total: 17.6s	remaining: 18.7s
484:	learn: 119.4766934	total: 17.6s

619:	learn: 112.5809788	total: 22.7s	remaining: 13.9s
620:	learn: 112.5558494	total: 22.8s	remaining: 13.9s
621:	learn: 112.4768823	total: 22.8s	remaining: 13.9s
622:	learn: 112.3908420	total: 22.8s	remaining: 13.8s
623:	learn: 112.3581872	total: 22.9s	remaining: 13.8s
624:	learn: 112.2710091	total: 22.9s	remaining: 13.7s
625:	learn: 112.2455134	total: 22.9s	remaining: 13.7s
626:	learn: 112.2318126	total: 23s	remaining: 13.7s
627:	learn: 112.2200484	total: 23s	remaining: 13.6s
628:	learn: 112.2101536	total: 23s	remaining: 13.6s
629:	learn: 112.2082204	total: 23.1s	remaining: 13.6s
630:	learn: 112.1977044	total: 24.2s	remaining: 14.2s
631:	learn: 112.1885998	total: 24.3s	remaining: 14.1s
632:	learn: 112.1770344	total: 24.3s	remaining: 14.1s
633:	learn: 112.1743292	total: 24.4s	remaining: 14.1s
634:	learn: 112.1352605	total: 24.4s	remaining: 14s
635:	learn: 112.0683889	total: 24.4s	remaining: 14s
636:	learn: 112.0539620	total: 24.5s	remaining: 13.9s
637:	learn: 112.0163286	total: 24.5s	r

778:	learn: 106.3546167	total: 35.4s	remaining: 10s
779:	learn: 106.3299680	total: 35.4s	remaining: 10s
780:	learn: 106.3166158	total: 35.5s	remaining: 9.95s
781:	learn: 106.2969089	total: 35.5s	remaining: 9.9s
782:	learn: 106.2854339	total: 35.5s	remaining: 9.85s
783:	learn: 106.2816655	total: 35.6s	remaining: 9.8s
784:	learn: 106.2713059	total: 35.6s	remaining: 9.75s
785:	learn: 106.2534891	total: 35.6s	remaining: 9.7s
786:	learn: 106.2469570	total: 35.6s	remaining: 9.65s
787:	learn: 106.2356528	total: 35.7s	remaining: 9.6s
788:	learn: 106.2263014	total: 35.7s	remaining: 9.55s
789:	learn: 106.1944049	total: 35.7s	remaining: 9.5s
790:	learn: 106.1588967	total: 37.3s	remaining: 9.85s
791:	learn: 106.1527884	total: 37.3s	remaining: 9.8s
792:	learn: 106.1339050	total: 37.3s	remaining: 9.75s
793:	learn: 106.1146710	total: 37.4s	remaining: 9.7s
794:	learn: 106.1005698	total: 37.4s	remaining: 9.64s
795:	learn: 106.0725382	total: 37.4s	remaining: 9.59s
796:	learn: 106.0556418	total: 37.5s	re

935:	learn: 104.1514056	total: 47.6s	remaining: 3.26s
936:	learn: 104.1498337	total: 47.7s	remaining: 3.21s
937:	learn: 104.1394528	total: 47.7s	remaining: 3.15s
938:	learn: 104.1362974	total: 47.7s	remaining: 3.1s
939:	learn: 104.1160992	total: 47.8s	remaining: 3.05s
940:	learn: 104.1098317	total: 47.8s	remaining: 3s
941:	learn: 104.1043165	total: 47.8s	remaining: 2.94s
942:	learn: 104.0958016	total: 47.8s	remaining: 2.89s
943:	learn: 104.0885779	total: 47.9s	remaining: 2.84s
944:	learn: 104.0630890	total: 47.9s	remaining: 2.79s
945:	learn: 104.0508508	total: 47.9s	remaining: 2.74s
946:	learn: 104.0498561	total: 48s	remaining: 2.68s
947:	learn: 104.0478331	total: 48s	remaining: 2.63s
948:	learn: 104.0025187	total: 48s	remaining: 2.58s
949:	learn: 103.9971552	total: 48s	remaining: 2.53s
950:	learn: 103.9917577	total: 48.1s	remaining: 2.48s
951:	learn: 103.9826037	total: 48.1s	remaining: 2.42s
952:	learn: 103.9761652	total: 48.1s	remaining: 2.37s
953:	learn: 103.9595861	total: 48.1s	rem

<catboost.core.CatBoostRegressor at 0x28a9b8250d0>

In [23]:
#Получение ответов

y_pred_solution = model.predict(pool_test_solution)

In [24]:
#Вот так они выглядят

y_pred_solution.astype(int)

array([-1, -5,  0, ..., 18, 24, 32])

In [19]:
#print (y_pred_solution.min(), y_pred_solution.max(), y_pred_solution.mean())

-170.26888558470228 1380.2876517290701 18.57675443823908


In [20]:
for i in range(len(y_pred_solution)):
    if y_pred_solution[i] < 1:
        y_pred_solution[i] = 1
y_pred_solution.astype(int)

array([ 1,  1,  1, ..., 18, 24, 32])

In [25]:
#Формируем sample_solution для отправки на платформу
test_solution = test.copy()
test_solution['PATIENT_ID_COUNT'] = y_pred_solution.astype(int)

In [26]:
#Сохраняем в csv файл
 
test_solution.to_csv('sample_solution_sample2.csv', sep=';', index=None)

In [27]:
train = pd.read_csv('train_dataset_train.csv', sep=';')
itog = pd.read_csv('sample_solution_sample2.csv', sep=';')
itog_old = pd.read_csv('sample_solution_sample.csv', sep=';')
display (itog.head(10))
display (train.head(10))
display (itog_old.head(10))

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00,Калининград,4.22,children,-1
1,0,A00,Калининград,4.22,elderly,-5
2,0,A00,Калининград,4.22,middleage,0
3,0,A00,Калининград,4.22,young,13
4,0,A01,Калининград,4.22,middleage,0
5,0,A02.0,Гурьевск,4.22,children,20
6,0,A02.0,Калининград,4.22,children,-1
7,0,A02.0,Черняховск,4.22,children,26
8,0,A03.9,Калининград,4.22,children,-1
9,0,A04.0,Черняховск,4.22,children,26


Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00.0,Гурьевск,8.21,young,1
1,0,A00.0,Калининград,3.2,children,1
2,0,A00,Гусев,3.19,children,1
3,0,A00,Калининград,1.22,children,1
4,0,A00,Калининград,2.18,children,1
5,0,A00,Калининград,3.22,children,4
6,0,A00,Калининград,3.22,elderly,1
7,0,A00,Калининград,3.22,middleage,1
8,0,A00,Калининград,3.22,young,3
9,0,A00,Калининград,7.18,young,1


Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00,Калининград,4.22,children,1
1,0,A00,Калининград,4.22,elderly,1
2,0,A00,Калининград,4.22,middleage,1
3,0,A00,Калининград,4.22,young,1
4,0,A01,Калининград,4.22,middleage,1
5,0,A02.0,Гурьевск,4.22,children,1
6,0,A02.0,Калининград,4.22,children,1
7,0,A02.0,Черняховск,4.22,children,1
8,0,A03.9,Калининград,4.22,children,1
9,0,A04.0,Черняховск,4.22,children,0


In [28]:
print (y_pred_solution.min(), y_pred_solution.max(), y_pred_solution.mean())

-170.26888558470228 1380.2876517290701 18.57675443823908
