#### Условие задачи

Создать ML-модель, которая сможет прогнозировать количество пациентов по каждому виду заболевания согласно классификации МКБ-10, с учетом информации о месте проживания, половозрастных характеристиках людей, а также времени года.

Датасет предоставлен Медицинским информационно-аналитическим центром Калининградской области

#### Расшифровка признаков:  
PATIENT_SEX – пол группы пациентов  
MKB_CODE – первичный диагноз группы пациентов, код МКБ-10  
ADRES – населенный пункт группы пациентов  
VISIT_MONTH_YEAR – месяц и год постановки диагноза  
AGE_CATEGORY – возрастная категория группы пациентов (Классификация ВОЗ)  
PATIENT_ID_COUNT – кол-во пациентов в группе, которая характеризуется вышеперечисленными признаками 

#### Метрика
Коэффициент детерминации (R2)

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
#from tensorflow import keras
import time 
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
#train = pd.read_csv('train_dataset_train.csv', sep=';')
#test = pd.read_csv('test_dataset_test.csv', sep=';')
train = pd.read_csv('train_dataset_train.csv', sep=';', index_col=None, dtype={'PATIENT_SEX':str, 'MKB_CODE':str, 'ADRES':str, 'VISIT_MONTH_YEAR':str, 'AGE_CATEGORY':str, 'PATIENT_ID_COUNT':int})
test = pd.read_csv('test_dataset_test.csv', sep=';', index_col=None, dtype={'PATIENT_SEX':str, 'MKB_CODE':str, 'ADRES':str, 'VISIT_MONTH_YEAR':str, 'AGE_CATEGORY':str})

In [86]:
COLUMN = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY', 'PATIENT_ID_COUNT']
#COLUMN_XY = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH', 'VISIT_YEAR', 'AGE_CATEGORY']
#-year
COLUMN_XY = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH', 'AGE_CATEGORY']
RND = 12345
warnings.filterwarnings('ignore') 

In [5]:
display(train.head())

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00.0,Гурьевск,8.21,young,1
1,0,A00.0,Калининград,3.2,children,1
2,0,A00,Гусев,3.19,children,1
3,0,A00,Калининград,1.22,children,1
4,0,A00,Калининград,2.18,children,1


In [None]:
# смотрим тип данных, проверяем на пропуски
train.info()
for i in COLUMN:
    print(i, 'Null:', pd.isnull(train[i]).sum())

In [None]:
test.head()

In [None]:
# смотрим тип данных, проверяем на пропуски
test.info()

In [7]:
a=0
abc=[]
for i in range(len(train['MKB_CODE'])):
    if train['MKB_CODE'][i] == 'M32':
        print(train.loc[i])
        abc.append(train.loc[i])
        a=a+1
print ('итого', a)

PATIENT_SEX                0
MKB_CODE                 M32
ADRES               Балтийск
VISIT_MONTH_YEAR       02.19
AGE_CATEGORY           young
PATIENT_ID_COUNT           1
Name: 815171, dtype: object
PATIENT_SEX                 0
MKB_CODE                  M32
ADRES                Балтийск
VISIT_MONTH_YEAR        03.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            2
Name: 815172, dtype: object


AttributeError: 'NoneType' object has no attribute 'append'

In [15]:
lkm = train[train['MKB_CODE'] == 'M32.9' or train['MKB_CODE'] == 'M32.9']
lkm
lkm['PATIENT_ID_COUNT'].value_counts()

1    127
2     20
3      6
Name: PATIENT_ID_COUNT, dtype: int64

In [None]:
#Количество уникальных значений
#for i in COLUMN:
#    display(i, train[i].value_counts())

In [None]:
#список уникальных значений
#for i in COLUMN:
#    print(i, ':', ' '.join(map(str, train[i].unique())), '\n')

In [None]:
#проверяем на дубликаты
train[train.duplicated() == True]

In [None]:
train_count = train['PATIENT_ID_COUNT'].value_counts()
train_count

In [None]:
#train_count.plot(kind='bar')

In [None]:
print(' '.join(map(str, train['PATIENT_ID_COUNT'].unique())), '\n')

In [None]:
#a=[]
#a = pd.DataFrame(train ['PATIENT_ID_COUNT'] > 10)
#a.value_counts()

In [None]:
print (train['PATIENT_ID_COUNT'].min(), train['PATIENT_ID_COUNT'].max(), train['PATIENT_ID_COUNT'].mean())

In [87]:
#выбрасываем все строки где меньше 10 посещений (id)
train_2 = train[train['PATIENT_ID_COUNT']>0]
#train_2 = train[train['PATIENT_ID_COUNT']<0]
train_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2212393 entries, 0 to 2212392
Data columns (total 6 columns):
 #   Column            Dtype 
---  ------            ----- 
 0   PATIENT_SEX       object
 1   MKB_CODE          object
 2   ADRES             object
 3   VISIT_MONTH_YEAR  object
 4   AGE_CATEGORY      object
 5   PATIENT_ID_COUNT  int32 
dtypes: int32(1), object(5)
memory usage: 109.7+ MB


In [88]:
#выбрасываем все строки где количество событий меньше 3 (id.count)
train_2['COUNT'] = train_2.groupby(['PATIENT_ID_COUNT'])['PATIENT_ID_COUNT'].transform('count')
#train_3 = train_2.loc[train_2['COUNT'] > 2]
train_3 = train_2.loc[train_2['COUNT'] > 5]
train=train_3.reset_index(drop=True)

In [89]:
train = train.drop(columns=['COUNT'])

In [65]:
train.describe()

Unnamed: 0,PATIENT_ID_COUNT
count,2210304.0
mean,4.556542
std,18.47093
min,1.0
25%,1.0
50%,1.0
75%,2.0
max,811.0


#### Разделяем дату

In [66]:
def data_visit (data):
    visit = pd.DataFrame(data['VISIT_MONTH_YEAR'].astype(str))
    visit = visit['VISIT_MONTH_YEAR'].str.split('.',expand=True)
    visit.columns = ['VISIT_MONTH','VISIT_YEAR']
    visit.loc[visit['VISIT_YEAR'] == '2', 'VISIT_YEAR'] = '20' #исправляем значение года 2 на 20
    visit = pd.concat([data, visit],axis=1)
    visit = visit.drop(columns=['VISIT_MONTH_YEAR'])
    return visit

In [67]:
train_visit = data_visit (train)
test_visit = data_visit (test)

In [68]:
#гипотеза о годах
train_visit = train_visit.drop(columns=['VISIT_YEAR'])
test_visit = test_visit.drop(columns=['VISIT_YEAR'])

In [69]:
train_visit.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2210304 entries, 0 to 2210303
Data columns (total 6 columns):
 #   Column            Dtype 
---  ------            ----- 
 0   PATIENT_SEX       object
 1   MKB_CODE          object
 2   ADRES             object
 3   AGE_CATEGORY      object
 4   PATIENT_ID_COUNT  int32 
 5   VISIT_MONTH       object
dtypes: int32(1), object(5)
memory usage: 92.7+ MB


In [70]:
train_visit

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,AGE_CATEGORY,PATIENT_ID_COUNT,VISIT_MONTH
0,0,A00.0,Гурьевск,young,1,08
1,0,A00.0,Калининград,children,1,03
2,0,A00,Гусев,children,1,03
3,0,A00,Калининград,children,1,01
4,0,A00,Калининград,children,1,02
...,...,...,...,...,...,...
2210299,1,Z99.1,Гурьевск,children,1,12
2210300,1,Z99.8,Калининград,young,1,10
2210301,1,Z99.9,Калининград,children,2,04
2210302,1,Z99.9,Калининград,children,1,08


#### Кодируем категориальные переменные

In [None]:
df = pd.concat([train_visit, test_visit], axis=0)
df['PATIENT_SEX'] = df['PATIENT_SEX'].astype(float)

In [None]:
def data_category (data):
    encoder = OrdinalEncoder()
    encoder.fit(data)
    category = pd.DataFrame(encoder.transform(data), columns=data.columns)
    return category

In [None]:
df

In [None]:
#train_category = train_visit.drop(columns=['VISIT_MONTH', 'VISIT_YEAR', 'PATIENT_ID_COUNT'])
#test_category = test_visit.drop(columns=['VISIT_MONTH', 'VISIT_YEAR'])

#train_category = train_visit.drop(columns=['VISIT_MONTH', 'PATIENT_ID_COUNT']) #-year
#test_category = test_visit.drop(columns=['VISIT_MONTH'])

df_1 = df.drop(columns=['VISIT_MONTH', 'PATIENT_ID_COUNT']) #-year

In [None]:
df_1.info()

In [None]:
#train_category = data_category (train_category)
df_1 = data_category (df)
#df_1 = pd.concat([df_1, df['VISIT_MONTH'], df['PATIENT_ID_COUNT']], axis=1)
#train_category = df_1.loc[df_1['PATIENT_ID_COUNT'] >= 0]
#train=train_3.reset_index(drop=True)



In [None]:
#train_category = pd.concat([train_category, train_visit['VISIT_MONTH'].astype(np.float64),
#                            train_visit['VISIT_YEAR'].astype(np.float64), train_visit['PATIENT_ID_COUNT']], axis=1)

In [None]:
#-year
train_category = pd.concat([train_category, train_visit['VISIT_MONTH'].astype(np.float64),
                          train_visit['PATIENT_ID_COUNT']], axis=1)

In [None]:
test_category = data_category (test_category)

In [None]:
#test_category = pd.concat([test_category, test_visit['VISIT_MONTH'].astype(np.float64),
#                            test_visit['VISIT_YEAR'].astype(np.float64)], axis=1)

In [None]:
#-year
test_category = pd.concat ([test_category, test_visit['VISIT_MONTH'].astype(np.float64)], axis=1)

In [None]:
#проверяем данные на мультиколлинеарность
#train_category.corr()

In [None]:
train_category.info()

In [None]:
train_category

In [None]:
train_category[pd.isnull(train_category['VISIT_MONTH'])]

### Выборки для модели

In [None]:
#X = train_category.drop(columns=['PATIENT_ID_COUNT'])
#y = train_category['PATIENT_ID_COUNT']

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RND)

In [None]:
#X_train[pd.isnull(X_train['VISIT_MONTH'])]

#### CatBoost

In [None]:
X = train_visit.drop(columns=['PATIENT_ID_COUNT'])
y = train_visit['PATIENT_ID_COUNT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RND)

In [None]:
pool_train = Pool(X_train.astype(str), y_train, cat_features = COLUMN_XY)
pool_test = Pool(X_test.astype(str), cat_features = COLUMN_XY)

In [None]:
%%time
model = CatBoostRegressor(task_type='GPU', random_state = RND)
model.fit(pool_train)

In [None]:
%%time
y_pred = model.predict(pool_test)

In [None]:
print("Значение метрики R2 на test: ", r2_score(y_test, y_pred))

In [None]:
y_pred = y_pred.astype(int)
print (y_pred.min(), y_pred.max(), y_pred.mean())

In [71]:
#catboost

X = train_visit[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH', 'AGE_CATEGORY']]
y = train_visit[['PATIENT_ID_COUNT']]
#X = train[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY']]
#y = train[['PATIENT_ID_COUNT']]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RND)


pool_train_solution = Pool(X_train, y_train, cat_features = COLUMN_XY)
pool_test_solution = Pool(X_test, cat_features = COLUMN_XY)
#pool_train_solution = Pool(X, y, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])
#pool_test_solution = Pool(test, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])

model_solution = CatBoostRegressor(task_type='GPU')
model_solution.fit(pool_train_solution)
y_pred_solution = model_solution.predict(pool_test_solution)
y_pred_solution.astype(int)

for i in range(len(y_pred_solution)):
    if y_pred_solution[i] < 0:
        y_pred_solution[i] = 1

print("Значение метрики R2 на test: ", r2_score(y_test, y_pred_solution))

#test_111 = pd.read_csv('test_dataset_test.csv', sep=';')
#train_111 = pd.read_csv('train_dataset_train.csv', sep=';')
#test_111['PATIENT_ID_COUNT'] = y_pred_solution.astype(int)
#display (test_111.head(10))
#display (train_111.head(10))



Learning rate set to 0.107582
0:	learn: 17.8444308	total: 475ms	remaining: 7m 54s
1:	learn: 17.3817920	total: 774ms	remaining: 6m 26s
2:	learn: 16.9796198	total: 1.07s	remaining: 5m 56s
3:	learn: 16.6481993	total: 1.37s	remaining: 5m 41s
4:	learn: 16.3332208	total: 1.61s	remaining: 5m 19s
5:	learn: 16.0740256	total: 1.91s	remaining: 5m 15s
6:	learn: 15.8425643	total: 2.21s	remaining: 5m 12s
7:	learn: 15.6626703	total: 2.5s	remaining: 5m 9s
8:	learn: 15.4018960	total: 2.84s	remaining: 5m 12s
9:	learn: 15.1916633	total: 3.24s	remaining: 5m 20s
10:	learn: 15.0127370	total: 3.64s	remaining: 5m 27s
11:	learn: 14.6388510	total: 4s	remaining: 5m 28s
12:	learn: 14.5065240	total: 4.44s	remaining: 5m 36s
13:	learn: 14.1772699	total: 4.69s	remaining: 5m 30s
14:	learn: 14.0721574	total: 5.08s	remaining: 5m 33s
15:	learn: 13.9942767	total: 5.45s	remaining: 5m 35s
16:	learn: 13.9157826	total: 5.9s	remaining: 5m 41s
17:	learn: 13.6729507	total: 6.15s	remaining: 5m 35s
18:	learn: 13.4850151	total: 6.4

155:	learn: 10.6733943	total: 49.8s	remaining: 4m 29s
156:	learn: 10.6699998	total: 50.1s	remaining: 4m 28s
157:	learn: 10.6623346	total: 50.5s	remaining: 4m 28s
158:	learn: 10.6574725	total: 50.9s	remaining: 4m 28s
159:	learn: 10.6568488	total: 51.2s	remaining: 4m 28s
160:	learn: 10.6537395	total: 51.5s	remaining: 4m 28s
161:	learn: 10.6409630	total: 51.8s	remaining: 4m 28s
162:	learn: 10.6371026	total: 52.1s	remaining: 4m 27s
163:	learn: 10.6342780	total: 52.4s	remaining: 4m 26s
164:	learn: 10.6301623	total: 52.6s	remaining: 4m 26s
165:	learn: 10.6280566	total: 52.9s	remaining: 4m 25s
166:	learn: 10.6250572	total: 53.2s	remaining: 4m 25s
167:	learn: 10.6232180	total: 53.6s	remaining: 4m 25s
168:	learn: 10.6202164	total: 53.9s	remaining: 4m 24s
169:	learn: 10.6191202	total: 54.3s	remaining: 4m 24s
170:	learn: 10.6138444	total: 54.8s	remaining: 4m 25s
171:	learn: 10.6093686	total: 55s	remaining: 4m 24s
172:	learn: 10.5968469	total: 55.3s	remaining: 4m 24s
173:	learn: 10.5913942	total: 

307:	learn: 10.2224449	total: 1m 38s	remaining: 3m 41s
308:	learn: 10.2206788	total: 1m 38s	remaining: 3m 41s
309:	learn: 10.2198741	total: 1m 39s	remaining: 3m 40s
310:	learn: 10.2175492	total: 1m 39s	remaining: 3m 40s
311:	learn: 10.2155564	total: 1m 39s	remaining: 3m 39s
312:	learn: 10.2152996	total: 1m 39s	remaining: 3m 39s
313:	learn: 10.2152110	total: 1m 40s	remaining: 3m 39s
314:	learn: 10.2127464	total: 1m 40s	remaining: 3m 38s
315:	learn: 10.2107886	total: 1m 40s	remaining: 3m 38s
316:	learn: 10.2102618	total: 1m 41s	remaining: 3m 38s
317:	learn: 10.2084590	total: 1m 41s	remaining: 3m 37s
318:	learn: 10.2077623	total: 1m 41s	remaining: 3m 37s
319:	learn: 10.2062096	total: 1m 42s	remaining: 3m 37s
320:	learn: 10.2059574	total: 1m 42s	remaining: 3m 36s
321:	learn: 10.2037540	total: 1m 42s	remaining: 3m 36s
322:	learn: 10.2008280	total: 1m 43s	remaining: 3m 35s
323:	learn: 10.1999494	total: 1m 43s	remaining: 3m 35s
324:	learn: 10.1993612	total: 1m 43s	remaining: 3m 35s
325:	learn

458:	learn: 9.9699038	total: 2m 27s	remaining: 2m 53s
459:	learn: 9.9690660	total: 2m 27s	remaining: 2m 53s
460:	learn: 9.9685400	total: 2m 28s	remaining: 2m 53s
461:	learn: 9.9668021	total: 2m 28s	remaining: 2m 53s
462:	learn: 9.9635722	total: 2m 28s	remaining: 2m 52s
463:	learn: 9.9632013	total: 2m 29s	remaining: 2m 52s
464:	learn: 9.9629433	total: 2m 29s	remaining: 2m 51s
465:	learn: 9.9600375	total: 2m 29s	remaining: 2m 51s
466:	learn: 9.9598499	total: 2m 30s	remaining: 2m 51s
467:	learn: 9.9590654	total: 2m 30s	remaining: 2m 51s
468:	learn: 9.9561044	total: 2m 30s	remaining: 2m 50s
469:	learn: 9.9559126	total: 2m 31s	remaining: 2m 50s
470:	learn: 9.9555290	total: 2m 31s	remaining: 2m 50s
471:	learn: 9.9546410	total: 2m 32s	remaining: 2m 50s
472:	learn: 9.9536584	total: 2m 32s	remaining: 2m 49s
473:	learn: 9.9523320	total: 2m 32s	remaining: 2m 49s
474:	learn: 9.9513795	total: 2m 33s	remaining: 2m 49s
475:	learn: 9.9470982	total: 2m 33s	remaining: 2m 48s
476:	learn: 9.9469044	total:

611:	learn: 9.7935574	total: 3m 19s	remaining: 2m 6s
612:	learn: 9.7934170	total: 3m 19s	remaining: 2m 6s
613:	learn: 9.7917686	total: 3m 20s	remaining: 2m 5s
614:	learn: 9.7877567	total: 3m 20s	remaining: 2m 5s
615:	learn: 9.7874613	total: 3m 20s	remaining: 2m 5s
616:	learn: 9.7867254	total: 3m 21s	remaining: 2m 4s
617:	learn: 9.7865262	total: 3m 21s	remaining: 2m 4s
618:	learn: 9.7844442	total: 3m 21s	remaining: 2m 4s
619:	learn: 9.7841386	total: 3m 21s	remaining: 2m 3s
620:	learn: 9.7839106	total: 3m 22s	remaining: 2m 3s
621:	learn: 9.7811371	total: 3m 22s	remaining: 2m 3s
622:	learn: 9.7808632	total: 3m 22s	remaining: 2m 2s
623:	learn: 9.7807902	total: 3m 23s	remaining: 2m 2s
624:	learn: 9.7774374	total: 3m 23s	remaining: 2m 2s
625:	learn: 9.7772592	total: 3m 24s	remaining: 2m 1s
626:	learn: 9.7761023	total: 3m 24s	remaining: 2m 1s
627:	learn: 9.7758112	total: 3m 24s	remaining: 2m 1s
628:	learn: 9.7753243	total: 3m 25s	remaining: 2m
629:	learn: 9.7751605	total: 3m 25s	remaining: 2m

764:	learn: 9.7122772	total: 4m 4s	remaining: 1m 15s
765:	learn: 9.7118324	total: 4m 4s	remaining: 1m 14s
766:	learn: 9.7116716	total: 4m 4s	remaining: 1m 14s
767:	learn: 9.7114471	total: 4m 5s	remaining: 1m 14s
768:	learn: 9.7114410	total: 4m 5s	remaining: 1m 13s
769:	learn: 9.7111853	total: 4m 5s	remaining: 1m 13s
770:	learn: 9.7109188	total: 4m 5s	remaining: 1m 13s
771:	learn: 9.7109146	total: 4m 6s	remaining: 1m 12s
772:	learn: 9.7102674	total: 4m 6s	remaining: 1m 12s
773:	learn: 9.7101486	total: 4m 6s	remaining: 1m 12s
774:	learn: 9.7098318	total: 4m 6s	remaining: 1m 11s
775:	learn: 9.7094917	total: 4m 7s	remaining: 1m 11s
776:	learn: 9.7083905	total: 4m 7s	remaining: 1m 11s
777:	learn: 9.7083831	total: 4m 7s	remaining: 1m 10s
778:	learn: 9.7082563	total: 4m 7s	remaining: 1m 10s
779:	learn: 9.7079306	total: 4m 8s	remaining: 1m 10s
780:	learn: 9.7079138	total: 4m 8s	remaining: 1m 9s
781:	learn: 9.7075405	total: 4m 8s	remaining: 1m 9s
782:	learn: 9.7075214	total: 4m 9s	remaining: 1m

920:	learn: 9.6753586	total: 4m 45s	remaining: 24.5s
921:	learn: 9.6752538	total: 4m 45s	remaining: 24.2s
922:	learn: 9.6752225	total: 4m 46s	remaining: 23.9s
923:	learn: 9.6751061	total: 4m 46s	remaining: 23.5s
924:	learn: 9.6750294	total: 4m 46s	remaining: 23.2s
925:	learn: 9.6748853	total: 4m 46s	remaining: 22.9s
926:	learn: 9.6745164	total: 4m 47s	remaining: 22.6s
927:	learn: 9.6745140	total: 4m 47s	remaining: 22.3s
928:	learn: 9.6743172	total: 4m 47s	remaining: 22s
929:	learn: 9.6742910	total: 4m 48s	remaining: 21.7s
930:	learn: 9.6739318	total: 4m 48s	remaining: 21.4s
931:	learn: 9.6733893	total: 4m 48s	remaining: 21.1s
932:	learn: 9.6733874	total: 4m 48s	remaining: 20.7s
933:	learn: 9.6733837	total: 4m 49s	remaining: 20.4s
934:	learn: 9.6732719	total: 4m 49s	remaining: 20.1s
935:	learn: 9.6732569	total: 4m 49s	remaining: 19.8s
936:	learn: 9.6731367	total: 4m 49s	remaining: 19.5s
937:	learn: 9.6727125	total: 4m 50s	remaining: 19.2s
938:	learn: 9.6722518	total: 4m 50s	remaining: 1

FileNotFoundError: [Errno 2] No such file or directory: 'test_dataset_train.csv'

In [83]:
X_test['PATIENT_ID_COUNT'] = y_pred_solution.astype(int)
display (X_test.head(10))
display (train.head(10))

for i in range(len(train['MKB_CODE'])):
    if train['MKB_CODE'][i] == 'Z02.7':
        print('train', train.loc[i])
        print('')
for i in range(len(X_test['MKB_CODE'])):
    if X_test['MKB_CODE'][i] == 'Z02.7':
        print('itog', X_test.loc[i])
        print('')

Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH,AGE_CATEGORY,PATIENT_ID_COUNT
1233178,0,Z02.7,Полесск,4,young,3
339954,0,H52.1,Черняховск,4,children,10
2205282,1,Z71.8,Железнодорожный,4,elderly,1
347487,0,H60.4,Калининградская,5,young,0
849477,0,M54.1,Черняховск,10,elderly,4
825791,0,M42.1,Славск,8,middleage,3
776134,0,M15.9,Калининград,12,middleage,34
399266,0,I21,Калининград,5,old,3
1947811,1,N11.0,Калининград,6,centenarians,1
667560,0,K61.0,Балтийск,11,middleage,1


Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00.0,Гурьевск,8.21,young,1
1,0,A00.0,Калининград,3.2,children,1
2,0,A00,Гусев,3.19,children,1
3,0,A00,Калининград,1.22,children,1
4,0,A00,Калининград,2.18,children,1
5,0,A00,Калининград,3.22,children,4
6,0,A00,Калининград,3.22,elderly,1
7,0,A00,Калининград,3.22,middleage,1
8,0,A00,Калининград,3.22,young,3
9,0,A00,Калининград,7.18,young,1


train PATIENT_SEX                     0
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY         centenarians
PATIENT_ID_COUNT                2
Name: 1228328, dtype: object

train PATIENT_SEX                     0
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY              elderly
PATIENT_ID_COUNT                5
Name: 1228329, dtype: object

train PATIENT_SEX                     0
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY            middleage
PATIENT_ID_COUNT                2
Name: 1228330, dtype: object

train PATIENT_SEX                     0
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY                  old
PATIENT_ID_COUNT                5
Name: 1228331, dtype: object

train PATIENT_SEX                     0


train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       02.20
AGE_CATEGORY             old
PATIENT_ID_COUNT           7
Name: 1228595, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       02.20
AGE_CATEGORY           young
PATIENT_ID_COUNT          31
Name: 1228596, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES                   Балтийск
VISIT_MONTH_YEAR           02.21
AGE_CATEGORY        centenarians
PATIENT_ID_COUNT               3
Name: 1228597, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY        children
PATIENT_ID_COUNT          22
Name: 1228598, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY  

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES               Волочаевское
VISIT_MONTH_YEAR           10.18
AGE_CATEGORY            children
PATIENT_ID_COUNT               1
Name: 1229202, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES               Волочаевское
VISIT_MONTH_YEAR           10.19
AGE_CATEGORY               young
PATIENT_ID_COUNT               1
Name: 1229203, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES               Волочаевское
VISIT_MONTH_YEAR           11.18
AGE_CATEGORY           middleage
PATIENT_ID_COUNT               1
Name: 1229204, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES               Волочаевское
VISIT_MONTH_YEAR           11.19
AGE_CATEGORY           middleage
PATIENT_ID_COUNT               2
Name: 1229205, dtype: object

train PATIENT_SEX                    0
MKB_CODE                 

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY         elderly
PATIENT_ID_COUNT          14
Name: 1229540, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                Гурьевск
VISIT_MONTH_YEAR        02.21
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            9
Name: 1229541, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY             old
PATIENT_ID_COUNT           4
Name: 1229542, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY           young
PATIENT_ID_COUNT          46
Name: 1229543, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES                   Гурьевск
VISIT_MONTH_YEAR           02.22
AGE_CATEGORY    

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                   Гусев
VISIT_MONTH_YEAR        01.20
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            2
Name: 1229798, dtype: object

train PATIENT_SEX             0
MKB_CODE            Z02.7
ADRES               Гусев
VISIT_MONTH_YEAR    01.20
AGE_CATEGORY        young
PATIENT_ID_COUNT        9
Name: 1229799, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                  Гусев
VISIT_MONTH_YEAR       01.22
AGE_CATEGORY        children
PATIENT_ID_COUNT           2
Name: 1229800, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES                 Гусев
VISIT_MONTH_YEAR      01.22
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          1
Name: 1229801, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                   Гусев
VISIT_MONTH_YEAR        01.22
AGE_CATEGORY        middleage
PATIENT_ID_COUNT      

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Залесье
VISIT_MONTH_YEAR      11.19
AGE_CATEGORY          young
PATIENT_ID_COUNT          1
Name: 1230225, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                 Залесье
VISIT_MONTH_YEAR        12.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1230226, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       01.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           3
Name: 1230227, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       02.18
AGE_CATEGORY           young
PATIENT_ID_COUNT           1
Name: 1230228, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES                   Заливино
VISIT_MONTH_YEAR           03.19
AGE_CATEGORY        ce

train PATIENT_SEX                   0
MKB_CODE                  Z02.7
ADRES               Калининград
VISIT_MONTH_YEAR          04.19
AGE_CATEGORY            elderly
PATIENT_ID_COUNT            209
Name: 1230716, dtype: object

train PATIENT_SEX                   0
MKB_CODE                  Z02.7
ADRES               Калининград
VISIT_MONTH_YEAR          04.19
AGE_CATEGORY          middleage
PATIENT_ID_COUNT            202
Name: 1230717, dtype: object

train PATIENT_SEX                   0
MKB_CODE                  Z02.7
ADRES               Калининград
VISIT_MONTH_YEAR          04.19
AGE_CATEGORY                old
PATIENT_ID_COUNT             78
Name: 1230718, dtype: object

train PATIENT_SEX                   0
MKB_CODE                  Z02.7
ADRES               Калининград
VISIT_MONTH_YEAR          04.19
AGE_CATEGORY              young
PATIENT_ID_COUNT            389
Name: 1230719, dtype: object

train PATIENT_SEX                    0
MKB_CODE                   Z02.7
ADRES           

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Кострово
VISIT_MONTH_YEAR       01.19
AGE_CATEGORY           young
PATIENT_ID_COUNT           2
Name: 1231167, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Кострово
VISIT_MONTH_YEAR       02.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           1
Name: 1231168, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Кострово
VISIT_MONTH_YEAR       02.18
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           1
Name: 1231169, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                Кострово
VISIT_MONTH_YEAR        02.18
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1231170, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Кострово
VISIT_MONTH_YEAR       02.19
AGE_CATEGORY        children
PAT

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      04.19
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          1
Name: 1231498, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                 Луговое
VISIT_MONTH_YEAR        04.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1231499, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      04.19
AGE_CATEGORY          young
PATIENT_ID_COUNT          1
Name: 1231500, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      05.18
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          1
Name: 1231501, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      05.18
AGE_CATEGORY          young
PATIENT_ID_COUNT          

train PATIENT_SEX                        0
MKB_CODE                       Z02.7
ADRES               Малое Васильково
VISIT_MONTH_YEAR               11.18
AGE_CATEGORY                   young
PATIENT_ID_COUNT                   2
Name: 1231727, dtype: object

train PATIENT_SEX                        0
MKB_CODE                       Z02.7
ADRES               Малое Васильково
VISIT_MONTH_YEAR               11.19
AGE_CATEGORY                children
PATIENT_ID_COUNT                   5
Name: 1231728, dtype: object

train PATIENT_SEX                        0
MKB_CODE                       Z02.7
ADRES               Малое Васильково
VISIT_MONTH_YEAR               11.19
AGE_CATEGORY               middleage
PATIENT_ID_COUNT                   2
Name: 1231729, dtype: object

train PATIENT_SEX                        0
MKB_CODE                       Z02.7
ADRES               Малое Васильково
VISIT_MONTH_YEAR               11.19
AGE_CATEGORY                   young
PATIENT_ID_COUNT                   

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        12.19
AGE_CATEGORY         children
PATIENT_ID_COUNT            5
Name: 1232027, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        12.19
AGE_CATEGORY          elderly
PATIENT_ID_COUNT            1
Name: 1232028, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Мельниково
VISIT_MONTH_YEAR         01.18
AGE_CATEGORY             young
PATIENT_ID_COUNT             1
Name: 1232029, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Мельниково
VISIT_MONTH_YEAR         01.19
AGE_CATEGORY             young
PATIENT_ID_COUNT             1
Name: 1232030, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Мельниково
VISIT_MONTH_YEAR         

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Орловка
VISIT_MONTH_YEAR       09.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           3
Name: 1232671, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Орловка
VISIT_MONTH_YEAR       09.19
AGE_CATEGORY        children
PATIENT_ID_COUNT           2
Name: 1232672, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Орловка
VISIT_MONTH_YEAR       10.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           4
Name: 1232673, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Орловка
VISIT_MONTH_YEAR       10.19
AGE_CATEGORY        children
PATIENT_ID_COUNT           5
Name: 1232674, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Орловка
VISIT_MONTH_YEAR       11.18
AGE_CATEGORY        children
PATIENT_I

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Прибрежное
VISIT_MONTH_YEAR         02.18
AGE_CATEGORY          children
PATIENT_ID_COUNT             2
Name: 1233405, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Прибрежное
VISIT_MONTH_YEAR         02.19
AGE_CATEGORY          children
PATIENT_ID_COUNT             1
Name: 1233406, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Прибрежное
VISIT_MONTH_YEAR         03.18
AGE_CATEGORY          children
PATIENT_ID_COUNT             1
Name: 1233407, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Прибрежное
VISIT_MONTH_YEAR         03.19
AGE_CATEGORY          children
PATIENT_ID_COUNT             1
Name: 1233408, dtype: object

train PATIENT_SEX                  0
MKB_CODE                 Z02.7
ADRES               Прибрежное
VISIT_MONTH_Y

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               СТ Радуга
VISIT_MONTH_YEAR        09.18
AGE_CATEGORY          elderly
PATIENT_ID_COUNT            1
Name: 1234007, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               СТ Радуга
VISIT_MONTH_YEAR        09.19
AGE_CATEGORY            young
PATIENT_ID_COUNT            1
Name: 1234008, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               СТ Радуга
VISIT_MONTH_YEAR        11.18
AGE_CATEGORY         children
PATIENT_ID_COUNT            1
Name: 1234009, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               СТ Радуга
VISIT_MONTH_YEAR        11.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1234010, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES               СТ Радуга
VISIT_MONTH_YEAR        11.19
AGE_CATEGO

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      07.20
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          4
Name: 1234340, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                 Светлый
VISIT_MONTH_YEAR        07.20
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            6
Name: 1234341, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      07.20
AGE_CATEGORY            old
PATIENT_ID_COUNT          3
Name: 1234342, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      07.20
AGE_CATEGORY          young
PATIENT_ID_COUNT         19
Name: 1234343, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                Светлый
VISIT_MONTH_YEAR       07.21
AGE_CATEGORY        children
PATIENT_ID_COUNT     

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES                 Славск
VISIT_MONTH_YEAR       10.21
AGE_CATEGORY        children
PATIENT_ID_COUNT          13
Name: 1234705, dtype: object

train PATIENT_SEX               0
MKB_CODE              Z02.7
ADRES                Славск
VISIT_MONTH_YEAR      10.21
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          5
Name: 1234706, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                  Славск
VISIT_MONTH_YEAR        10.21
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            6
Name: 1234707, dtype: object

train PATIENT_SEX              0
MKB_CODE             Z02.7
ADRES               Славск
VISIT_MONTH_YEAR     10.21
AGE_CATEGORY           old
PATIENT_ID_COUNT         2
Name: 1234708, dtype: object

train PATIENT_SEX              0
MKB_CODE             Z02.7
ADRES               Славск
VISIT_MONTH_YEAR     10.21
AGE_CATEGORY         young
PATIENT_ID_COUNT         4
Name

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Храброво
VISIT_MONTH_YEAR       04.18
AGE_CATEGORY           young
PATIENT_ID_COUNT           8
Name: 1235177, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Храброво
VISIT_MONTH_YEAR       04.19
AGE_CATEGORY        children
PATIENT_ID_COUNT          11
Name: 1235178, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Храброво
VISIT_MONTH_YEAR       04.19
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           2
Name: 1235179, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                Храброво
VISIT_MONTH_YEAR        04.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1235180, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Храброво
VISIT_MONTH_YEAR       04.19
AGE_CATEGORY           young
PAT

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Янтарный
VISIT_MONTH_YEAR       10.18
AGE_CATEGORY           young
PATIENT_ID_COUNT           2
Name: 1235543, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Янтарный
VISIT_MONTH_YEAR       10.19
AGE_CATEGORY        children
PATIENT_ID_COUNT          22
Name: 1235544, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Янтарный
VISIT_MONTH_YEAR       10.19
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           1
Name: 1235545, dtype: object

train PATIENT_SEX                 0
MKB_CODE                Z02.7
ADRES                Янтарный
VISIT_MONTH_YEAR        10.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 1235546, dtype: object

train PATIENT_SEX                0
MKB_CODE               Z02.7
ADRES               Янтарный
VISIT_MONTH_YEAR       10.19
AGE_CATEGORY           young
PAT

train PATIENT_SEX                     1
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY             children
PATIENT_ID_COUNT                1
Name: 2155266, dtype: object

train PATIENT_SEX                     1
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY              elderly
PATIENT_ID_COUNT                2
Name: 2155267, dtype: object

train PATIENT_SEX                     1
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.18
AGE_CATEGORY                young
PATIENT_ID_COUNT                2
Name: 2155268, dtype: object

train PATIENT_SEX                     1
MKB_CODE                    Z02.7
ADRES               Багратионовск
VISIT_MONTH_YEAR            01.19
AGE_CATEGORY              elderly
PATIENT_ID_COUNT                1
Name: 2155269, dtype: object

train PATIENT_SEX                     1


train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       12.18
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           6
Name: 2155682, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES                Балтийск
VISIT_MONTH_YEAR        12.18
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            5
Name: 2155683, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       12.18
AGE_CATEGORY             old
PATIENT_ID_COUNT           2
Name: 2155684, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       12.18
AGE_CATEGORY           young
PATIENT_ID_COUNT           7
Name: 2155685, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Балтийск
VISIT_MONTH_YEAR       12.19
AGE_CATEGORY        children
PAT

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Гвардейск
VISIT_MONTH_YEAR        01.20
AGE_CATEGORY            young
PATIENT_ID_COUNT            5
Name: 2156023, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Гвардейск
VISIT_MONTH_YEAR        01.21
AGE_CATEGORY         children
PATIENT_ID_COUNT            1
Name: 2156024, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Гвардейск
VISIT_MONTH_YEAR        01.21
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            2
Name: 2156025, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Гвардейск
VISIT_MONTH_YEAR        01.21
AGE_CATEGORY            young
PATIENT_ID_COUNT            3
Name: 2156026, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Гвардейск
VISIT_MONTH_YEAR        01.22
AGE_CATEGO

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       08.21
AGE_CATEGORY           young
PATIENT_ID_COUNT          38
Name: 2156336, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       09.18
AGE_CATEGORY        children
PATIENT_ID_COUNT         244
Name: 2156337, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       09.18
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           1
Name: 2156338, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       09.18
AGE_CATEGORY             old
PATIENT_ID_COUNT           1
Name: 2156339, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Гурьевск
VISIT_MONTH_YEAR       09.18
AGE_CATEGORY           young
PATIENT_I

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Залесье
VISIT_MONTH_YEAR      12.19
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          1
Name: 2156753, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       01.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           2
Name: 2156754, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       04.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           1
Name: 2156755, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       05.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           1
Name: 2156756, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Заливино
VISIT_MONTH_YEAR       06.19
AGE_CATEGORY         elderly
PATIENT_ID_COUN

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Коврово
VISIT_MONTH_YEAR      12.19
AGE_CATEGORY          young
PATIENT_ID_COUNT          2
Name: 2157506, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Колосовка
VISIT_MONTH_YEAR        01.18
AGE_CATEGORY         children
PATIENT_ID_COUNT            2
Name: 2157507, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Колосовка
VISIT_MONTH_YEAR        01.19
AGE_CATEGORY          elderly
PATIENT_ID_COUNT            1
Name: 2157508, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Колосовка
VISIT_MONTH_YEAR        01.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 2157509, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Колосовка
VISIT_MONTH_YEAR        03.19
AGE_CATEGORY         c

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      05.19
AGE_CATEGORY          young
PATIENT_ID_COUNT          1
Name: 2157862, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES                Луговое
VISIT_MONTH_YEAR       06.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           3
Name: 2157863, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      06.18
AGE_CATEGORY          young
PATIENT_ID_COUNT          1
Name: 2157864, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES                Луговое
VISIT_MONTH_YEAR       06.19
AGE_CATEGORY        children
PATIENT_ID_COUNT           3
Name: 2157865, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Луговое
VISIT_MONTH_YEAR      06.19
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        01.19
AGE_CATEGORY         children
PATIENT_ID_COUNT            6
Name: 2158211, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        01.19
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 2158212, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        02.18
AGE_CATEGORY         children
PATIENT_ID_COUNT            2
Name: 2158213, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        02.18
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            2
Name: 2158214, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Матросово
VISIT_MONTH_YEAR        02.19
AGE_CATEGO

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Нестеров
VISIT_MONTH_YEAR       09.21
AGE_CATEGORY         elderly
PATIENT_ID_COUNT           1
Name: 2158568, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES                Нестеров
VISIT_MONTH_YEAR        09.21
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 2158569, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Нестеров
VISIT_MONTH_YEAR       09.21
AGE_CATEGORY           young
PATIENT_ID_COUNT           1
Name: 2158570, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Нестеров
VISIT_MONTH_YEAR       10.18
AGE_CATEGORY        children
PATIENT_ID_COUNT           9
Name: 2158571, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES               Нестеров
VISIT_MONTH_YEAR       10.18
AGE_CATEGORY         elderly
PAT

train PATIENT_SEX                    1
MKB_CODE                   Z02.7
ADRES               Переславское
VISIT_MONTH_YEAR           09.19
AGE_CATEGORY           middleage
PATIENT_ID_COUNT               3
Name: 2158902, dtype: object

train PATIENT_SEX                    1
MKB_CODE                   Z02.7
ADRES               Переславское
VISIT_MONTH_YEAR           09.19
AGE_CATEGORY                 old
PATIENT_ID_COUNT               1
Name: 2158903, dtype: object

train PATIENT_SEX                    1
MKB_CODE                   Z02.7
ADRES               Переславское
VISIT_MONTH_YEAR           09.19
AGE_CATEGORY               young
PATIENT_ID_COUNT               1
Name: 2158904, dtype: object

train PATIENT_SEX                    1
MKB_CODE                   Z02.7
ADRES               Переславское
VISIT_MONTH_YEAR           10.18
AGE_CATEGORY             elderly
PATIENT_ID_COUNT               1
Name: 2158905, dtype: object

train PATIENT_SEX                    1
MKB_CODE                 

train PATIENT_SEX                  1
MKB_CODE                 Z02.7
ADRES               Пионерский
VISIT_MONTH_YEAR         10.19
AGE_CATEGORY           elderly
PATIENT_ID_COUNT             3
Name: 2159125, dtype: object

train PATIENT_SEX                  1
MKB_CODE                 Z02.7
ADRES               Пионерский
VISIT_MONTH_YEAR         10.19
AGE_CATEGORY         middleage
PATIENT_ID_COUNT             2
Name: 2159126, dtype: object

train PATIENT_SEX                  1
MKB_CODE                 Z02.7
ADRES               Пионерский
VISIT_MONTH_YEAR         10.19
AGE_CATEGORY             young
PATIENT_ID_COUNT             1
Name: 2159127, dtype: object

train PATIENT_SEX                  1
MKB_CODE                 Z02.7
ADRES               Пионерский
VISIT_MONTH_YEAR         10.20
AGE_CATEGORY          children
PATIENT_ID_COUNT            72
Name: 2159128, dtype: object

train PATIENT_SEX                  1
MKB_CODE                 Z02.7
ADRES               Пионерский
VISIT_MONTH_Y

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Правдинск
VISIT_MONTH_YEAR        06.19
AGE_CATEGORY         children
PATIENT_ID_COUNT            1
Name: 2159379, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Правдинск
VISIT_MONTH_YEAR        06.19
AGE_CATEGORY            young
PATIENT_ID_COUNT            1
Name: 2159380, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Правдинск
VISIT_MONTH_YEAR        06.20
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            1
Name: 2159381, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Правдинск
VISIT_MONTH_YEAR        06.21
AGE_CATEGORY         children
PATIENT_ID_COUNT            1
Name: 2159382, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Правдинск
VISIT_MONTH_YEAR        06.21
AGE_CATEGO

train PATIENT_SEX                                1
MKB_CODE                               Z02.7
ADRES               СТ Искра ул. Тюльпановая
VISIT_MONTH_YEAR                       09.18
AGE_CATEGORY                        children
PATIENT_ID_COUNT                           3
Name: 2159868, dtype: object

train PATIENT_SEX                                1
MKB_CODE                               Z02.7
ADRES               СТ Искра ул. Тюльпановая
VISIT_MONTH_YEAR                       09.19
AGE_CATEGORY                        children
PATIENT_ID_COUNT                           5
Name: 2159869, dtype: object

train PATIENT_SEX                                1
MKB_CODE                               Z02.7
ADRES               СТ Искра ул. Тюльпановая
VISIT_MONTH_YEAR                       10.18
AGE_CATEGORY                        children
PATIENT_ID_COUNT                           3
Name: 2159870, dtype: object

train PATIENT_SEX                                1
MKB_CODE                       

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES                 Светлый
VISIT_MONTH_YEAR        02.20
AGE_CATEGORY        middleage
PATIENT_ID_COUNT            5
Name: 2160166, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      02.20
AGE_CATEGORY            old
PATIENT_ID_COUNT          1
Name: 2160167, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      02.20
AGE_CATEGORY          young
PATIENT_ID_COUNT         13
Name: 2160168, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES                Светлый
VISIT_MONTH_YEAR       02.21
AGE_CATEGORY        children
PATIENT_ID_COUNT          19
Name: 2160169, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES               Светлый
VISIT_MONTH_YEAR      02.21
AGE_CATEGORY        elderly
PATIENT_ID_COUNT    

train PATIENT_SEX              1
MKB_CODE             Z02.7
ADRES               Славск
VISIT_MONTH_YEAR     09.20
AGE_CATEGORY         young
PATIENT_ID_COUNT        28
Name: 2160565, dtype: object

train PATIENT_SEX                    1
MKB_CODE                   Z02.7
ADRES                     Славск
VISIT_MONTH_YEAR           09.21
AGE_CATEGORY        centenarians
PATIENT_ID_COUNT               1
Name: 2160566, dtype: object

train PATIENT_SEX                1
MKB_CODE               Z02.7
ADRES                 Славск
VISIT_MONTH_YEAR       09.21
AGE_CATEGORY        children
PATIENT_ID_COUNT          11
Name: 2160567, dtype: object

train PATIENT_SEX               1
MKB_CODE              Z02.7
ADRES                Славск
VISIT_MONTH_YEAR      09.21
AGE_CATEGORY        elderly
PATIENT_ID_COUNT          8
Name: 2160568, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES                  Славск
VISIT_MONTH_YEAR        09.21
AGE_CATEGORY        middleag

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Шоссейное
VISIT_MONTH_YEAR        01.18
AGE_CATEGORY            young
PATIENT_ID_COUNT            1
Name: 2161171, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Шоссейное
VISIT_MONTH_YEAR        01.19
AGE_CATEGORY         children
PATIENT_ID_COUNT            6
Name: 2161172, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Шоссейное
VISIT_MONTH_YEAR        02.19
AGE_CATEGORY         children
PATIENT_ID_COUNT            1
Name: 2161173, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Шоссейное
VISIT_MONTH_YEAR        03.18
AGE_CATEGORY         children
PATIENT_ID_COUNT            2
Name: 2161174, dtype: object

train PATIENT_SEX                 1
MKB_CODE                Z02.7
ADRES               Шоссейное
VISIT_MONTH_YEAR        03.19
AGE_CATEGO

KeyError: 1

#### LGBRegressor

In [None]:
#def lgb_regr(features, target):
#    model = LGBMRegressor()
#    params = {
#        'boosting_type' : ['gbdt'],
#        'n_estimators' : [50, 100],
#        'max_depth' : [5, 7, 10],
#        'verbose' : [0],
#        'random_state' : [RND],
#        'force_col_wise': [True],
#        'num_leaves' : [127]}
#    grid_cv = GridSearchCV(estimator=model, param_grid=params, cv=3, n_jobs=-1, scoring='r2_score')
#    grid_cv.fit(features, target, categorical_feature=cats_cols)
#    print('Best hyperparams: ', grid_cv.best_params_)
#    print('RMSE score on train sample: {:.2f}'.format((-grid_cv.best_score_) ** 0.5))
#    return grid_cv.best_params_

In [None]:
#lgb_params = lgb_regr(X_train, y_train)

In [None]:
%%time
model = LGBMRegressor(n_estimators = 1000, random_state = RND)
model.fit(X_train, y_train, categorical_feature = COLUMN_XY)

In [None]:
%%time
#model_lgb = LGBMRegressor(**lgb_params)
#model_lgb.fit(X_train, y_train, eval_set=(X_test, y_test), verbose=10, eval_metric='rmse')
y_pred = abs(model.predict(X_test, categorical_feature = COLUMN_XY))
print("Значение метрики R2 на test: ", r2_score(y_test, y_pred))

In [None]:
y_pred = y_pred.astype(int)
print (y_pred.min(), y_pred.max(), y_pred.mean())

#### Keras

In [None]:
#model = keras.models.Sequential()
#model.add(keras.layers.Dense(units=1, input_dim=X_train.shape[1]))

In [None]:
#%%time
#model.compile(loss='r2_score', optimizer='sgd')
#model.fit(X_train, y_train) #, categorical_feature = COLUMN_XY)

In [None]:
#%%time
#y_pred = model.predict(X_test, categorical_feature = COLUMN_XY)

#### Итоговая модель

In [None]:
%%time
model = LGBMRegressor(n_estimators = 1000, random_state = RND)
model.fit(X, y, categorical_feature = COLUMN_XY)

In [None]:
y_pred = model.predict(test_category, categorical_feature = COLUMN_XY)

In [90]:
#catboost

#X = train_visit[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH', 'AGE_CATEGORY']]
#y = train_visit[['PATIENT_ID_COUNT']]
X = train[['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY']]
y = train[['PATIENT_ID_COUNT']]

#pool_train_solution = Pool(X, y, cat_features = COLUMN_XY)
#pool_test_solution = Pool(test_visit, cat_features = COLUMN_XY)
pool_train_solution = Pool(X, y, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])
pool_test_solution = Pool(test, cat_features = ['PATIENT_SEX', 'MKB_CODE', 'ADRES', 'VISIT_MONTH_YEAR', 'AGE_CATEGORY'])

model_solution = CatBoostRegressor(task_type='GPU')
model_solution.fit(pool_train_solution)
y_pred_solution = model_solution.predict(pool_test_solution)
y_pred_solution.astype(int)


for i in range(len(y_pred_solution)):
    if y_pred_solution[i] < 0:
        y_pred_solution[i] = 1

test['PATIENT_ID_COUNT'] = y_pred_solution.astype(int)
test.to_csv('sample_solution_sample.csv', sep=';', index=None)

Learning rate set to 0.110773
0:	learn: 17.8696539	total: 732ms	remaining: 12m 11s
1:	learn: 17.3710128	total: 1.29s	remaining: 10m 44s
2:	learn: 16.9738187	total: 1.93s	remaining: 10m 41s
3:	learn: 16.6237097	total: 2.49s	remaining: 10m 19s
4:	learn: 16.3178086	total: 3.11s	remaining: 10m 19s
5:	learn: 16.0674260	total: 3.76s	remaining: 10m 22s
6:	learn: 15.8473269	total: 4.47s	remaining: 10m 33s
7:	learn: 15.5662608	total: 5.28s	remaining: 10m 54s
8:	learn: 15.3245873	total: 6.09s	remaining: 11m 11s
9:	learn: 14.9065073	total: 6.83s	remaining: 11m 16s
10:	learn: 14.7272034	total: 7.65s	remaining: 11m 28s
11:	learn: 14.6091560	total: 8.4s	remaining: 11m 31s
12:	learn: 14.2885681	total: 9.11s	remaining: 11m 31s
13:	learn: 14.1766631	total: 10.1s	remaining: 11m 49s
14:	learn: 14.0839750	total: 10.9s	remaining: 11m 53s
15:	learn: 13.9983908	total: 11.6s	remaining: 11m 50s
16:	learn: 13.7632199	total: 12.2s	remaining: 11m 44s
17:	learn: 13.6942470	total: 12.9s	remaining: 11m 45s
18:	learn

151:	learn: 9.8937440	total: 1m 48s	remaining: 10m 6s
152:	learn: 9.8882312	total: 1m 49s	remaining: 10m 6s
153:	learn: 9.8846478	total: 1m 50s	remaining: 10m 5s
154:	learn: 9.8733624	total: 1m 50s	remaining: 10m 4s
155:	learn: 9.8648034	total: 1m 51s	remaining: 10m 3s
156:	learn: 9.8550011	total: 1m 52s	remaining: 10m 2s
157:	learn: 9.8480511	total: 1m 52s	remaining: 10m 1s
158:	learn: 9.8444994	total: 1m 53s	remaining: 10m
159:	learn: 9.8398088	total: 1m 54s	remaining: 9m 59s
160:	learn: 9.8267454	total: 1m 54s	remaining: 9m 58s
161:	learn: 9.8162208	total: 1m 55s	remaining: 9m 58s
162:	learn: 9.8049183	total: 1m 56s	remaining: 9m 57s
163:	learn: 9.8020120	total: 1m 56s	remaining: 9m 55s
164:	learn: 9.7950014	total: 1m 57s	remaining: 9m 55s
165:	learn: 9.7859811	total: 1m 58s	remaining: 9m 54s
166:	learn: 9.7708978	total: 1m 59s	remaining: 9m 53s
167:	learn: 9.7644450	total: 1m 59s	remaining: 9m 53s
168:	learn: 9.7586041	total: 2m	remaining: 9m 52s
169:	learn: 9.7551100	total: 2m 1s	

304:	learn: 9.1527693	total: 3m 38s	remaining: 8m 18s
305:	learn: 9.1515671	total: 3m 39s	remaining: 8m 18s
306:	learn: 9.1503247	total: 3m 40s	remaining: 8m 17s
307:	learn: 9.1475353	total: 3m 41s	remaining: 8m 17s
308:	learn: 9.1458246	total: 3m 42s	remaining: 8m 17s
309:	learn: 9.1448182	total: 3m 42s	remaining: 8m 15s
310:	learn: 9.1444121	total: 3m 43s	remaining: 8m 15s
311:	learn: 9.1393939	total: 3m 44s	remaining: 8m 14s
312:	learn: 9.1335836	total: 3m 44s	remaining: 8m 13s
313:	learn: 9.1324002	total: 3m 45s	remaining: 8m 12s
314:	learn: 9.1307522	total: 3m 46s	remaining: 8m 12s
315:	learn: 9.1225561	total: 3m 46s	remaining: 8m 11s
316:	learn: 9.1210210	total: 3m 47s	remaining: 8m 10s
317:	learn: 9.1206265	total: 3m 48s	remaining: 8m 9s
318:	learn: 9.1190604	total: 3m 49s	remaining: 8m 8s
319:	learn: 9.1166779	total: 3m 49s	remaining: 8m 8s
320:	learn: 9.1154094	total: 3m 50s	remaining: 8m 7s
321:	learn: 9.1131597	total: 3m 51s	remaining: 8m 6s
322:	learn: 9.1126370	total: 3m 5

458:	learn: 8.7775461	total: 5m 31s	remaining: 6m 30s
459:	learn: 8.7749966	total: 5m 31s	remaining: 6m 29s
460:	learn: 8.7720704	total: 5m 32s	remaining: 6m 28s
461:	learn: 8.7720667	total: 5m 33s	remaining: 6m 27s
462:	learn: 8.7706963	total: 5m 33s	remaining: 6m 27s
463:	learn: 8.7647337	total: 5m 34s	remaining: 6m 26s
464:	learn: 8.7642803	total: 5m 35s	remaining: 6m 25s
465:	learn: 8.7631210	total: 5m 36s	remaining: 6m 25s
466:	learn: 8.7608156	total: 5m 36s	remaining: 6m 24s
467:	learn: 8.7589509	total: 5m 37s	remaining: 6m 23s
468:	learn: 8.7569796	total: 5m 38s	remaining: 6m 23s
469:	learn: 8.7539354	total: 5m 39s	remaining: 6m 22s
470:	learn: 8.7470113	total: 5m 39s	remaining: 6m 21s
471:	learn: 8.7469232	total: 5m 40s	remaining: 6m 21s
472:	learn: 8.7463674	total: 5m 41s	remaining: 6m 20s
473:	learn: 8.7458377	total: 5m 42s	remaining: 6m 19s
474:	learn: 8.7449400	total: 5m 42s	remaining: 6m 18s
475:	learn: 8.7431291	total: 5m 43s	remaining: 6m 18s
476:	learn: 8.7421293	total:

612:	learn: 8.5399530	total: 7m 22s	remaining: 4m 39s
613:	learn: 8.5389993	total: 7m 23s	remaining: 4m 38s
614:	learn: 8.5384991	total: 7m 24s	remaining: 4m 37s
615:	learn: 8.5375148	total: 7m 24s	remaining: 4m 37s
616:	learn: 8.5349924	total: 7m 25s	remaining: 4m 36s
617:	learn: 8.5348224	total: 7m 26s	remaining: 4m 35s
618:	learn: 8.5340607	total: 7m 26s	remaining: 4m 34s
619:	learn: 8.5340145	total: 7m 27s	remaining: 4m 34s
620:	learn: 8.5320221	total: 7m 27s	remaining: 4m 33s
621:	learn: 8.5317091	total: 7m 28s	remaining: 4m 32s
622:	learn: 8.5312636	total: 7m 29s	remaining: 4m 31s
623:	learn: 8.5307609	total: 7m 30s	remaining: 4m 31s
624:	learn: 8.5296925	total: 7m 30s	remaining: 4m 30s
625:	learn: 8.5289214	total: 7m 31s	remaining: 4m 29s
626:	learn: 8.5283901	total: 7m 32s	remaining: 4m 29s
627:	learn: 8.5268231	total: 7m 33s	remaining: 4m 28s
628:	learn: 8.5257406	total: 7m 33s	remaining: 4m 27s
629:	learn: 8.5246818	total: 7m 34s	remaining: 4m 26s
630:	learn: 8.5242389	total:

766:	learn: 8.3528842	total: 9m 12s	remaining: 2m 47s
767:	learn: 8.3507771	total: 9m 12s	remaining: 2m 46s
768:	learn: 8.3505422	total: 9m 13s	remaining: 2m 46s
769:	learn: 8.3455302	total: 9m 13s	remaining: 2m 45s
770:	learn: 8.3453689	total: 9m 14s	remaining: 2m 44s
771:	learn: 8.3448593	total: 9m 15s	remaining: 2m 43s
772:	learn: 8.3447339	total: 9m 15s	remaining: 2m 43s
773:	learn: 8.3440078	total: 9m 16s	remaining: 2m 42s
774:	learn: 8.3438022	total: 9m 17s	remaining: 2m 41s
775:	learn: 8.3432166	total: 9m 17s	remaining: 2m 41s
776:	learn: 8.3419593	total: 9m 18s	remaining: 2m 40s
777:	learn: 8.3406285	total: 9m 19s	remaining: 2m 39s
778:	learn: 8.3406228	total: 9m 19s	remaining: 2m 38s
779:	learn: 8.3396854	total: 9m 20s	remaining: 2m 38s
780:	learn: 8.3395735	total: 9m 21s	remaining: 2m 37s
781:	learn: 8.3392167	total: 9m 22s	remaining: 2m 36s
782:	learn: 8.3387974	total: 9m 22s	remaining: 2m 36s
783:	learn: 8.3365566	total: 9m 23s	remaining: 2m 35s
784:	learn: 8.3362323	total:

918:	learn: 8.2206485	total: 11m	remaining: 58.2s
919:	learn: 8.2204196	total: 11m 1s	remaining: 57.5s
920:	learn: 8.2201752	total: 11m 2s	remaining: 56.8s
921:	learn: 8.2195174	total: 11m 2s	remaining: 56.1s
922:	learn: 8.2190431	total: 11m 3s	remaining: 55.4s
923:	learn: 8.2186402	total: 11m 4s	remaining: 54.7s
924:	learn: 8.2177664	total: 11m 5s	remaining: 53.9s
925:	learn: 8.2166987	total: 11m 6s	remaining: 53.2s
926:	learn: 8.2156780	total: 11m 7s	remaining: 52.5s
927:	learn: 8.2150048	total: 11m 7s	remaining: 51.8s
928:	learn: 8.2117923	total: 11m 8s	remaining: 51.1s
929:	learn: 8.2112061	total: 11m 9s	remaining: 50.4s
930:	learn: 8.2107199	total: 11m 9s	remaining: 49.6s
931:	learn: 8.2105374	total: 11m 10s	remaining: 48.9s
932:	learn: 8.2098567	total: 11m 11s	remaining: 48.2s
933:	learn: 8.2077324	total: 11m 12s	remaining: 47.5s
934:	learn: 8.2077134	total: 11m 12s	remaining: 46.8s
935:	learn: 8.2073849	total: 11m 13s	remaining: 46s
936:	learn: 8.2053966	total: 11m 14s	remaining

In [91]:
print (y_pred_solution.min(), y_pred_solution.max(), y_pred_solution.mean())

0.0002596776624841368 345.91598056577004 3.427430869321332


In [45]:
y_pred_solution = y_pred.astype(int)
print (y_pred_solution.min(), y_pred.max(), y_pred.mean())
print(len(X), len(y), len(test_visit), len(y_pred_solution))

NameError: name 'y_pred' is not defined

In [None]:
test_visit.info()

In [None]:
#Формируем sample_solution для отправки на платформу

test['PATIENT_ID_COUNT'] = y_pred_solution.astype(int)

In [None]:
test.info()

In [None]:
#Сохраняем в csv файл
 
test.to_csv('sample_solution_4.csv', sep=';', index=None)

In [92]:
train = pd.read_csv('train_dataset_train.csv', sep=';')
itog = pd.read_csv('sample_solution_sample.csv', sep=';')
display (itog.head(10))
display (train.head(10))


Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00,Калининград,4.22,children,1
1,0,A00,Калининград,4.22,elderly,1
2,0,A00,Калининград,4.22,middleage,1
3,0,A00,Калининград,4.22,young,1
4,0,A01,Калининград,4.22,middleage,1
5,0,A02.0,Гурьевск,4.22,children,1
6,0,A02.0,Калининград,4.22,children,1
7,0,A02.0,Черняховск,4.22,children,1
8,0,A03.9,Калининград,4.22,children,1
9,0,A04.0,Черняховск,4.22,children,0


Unnamed: 0,PATIENT_SEX,MKB_CODE,ADRES,VISIT_MONTH_YEAR,AGE_CATEGORY,PATIENT_ID_COUNT
0,0,A00.0,Гурьевск,8.21,young,1
1,0,A00.0,Калининград,3.2,children,1
2,0,A00,Гусев,3.19,children,1
3,0,A00,Калининград,1.22,children,1
4,0,A00,Калининград,2.18,children,1
5,0,A00,Калининград,3.22,children,4
6,0,A00,Калининград,3.22,elderly,1
7,0,A00,Калининград,3.22,middleage,1
8,0,A00,Калининград,3.22,young,3
9,0,A00,Калининград,7.18,young,1


In [11]:

for i in range(len(train['MKB_CODE'])):
    if train['MKB_CODE'][i] == 'A00':
        print('train', train.loc[i])
        print('')
for i in range(len(itog['MKB_CODE'])):
    if itog['MKB_CODE'][i] == 'A00':
        print('itog', itog.loc[i])
        print('')

train PATIENT_SEX                0
MKB_CODE                 A00
ADRES                  Гусев
VISIT_MONTH_YEAR        3.19
AGE_CATEGORY        children
PATIENT_ID_COUNT           1
Name: 2, dtype: object

train PATIENT_SEX                   0
MKB_CODE                    A00
ADRES               Калининград
VISIT_MONTH_YEAR           1.22
AGE_CATEGORY           children
PATIENT_ID_COUNT              1
Name: 3, dtype: object

train PATIENT_SEX                   0
MKB_CODE                    A00
ADRES               Калининград
VISIT_MONTH_YEAR           2.18
AGE_CATEGORY           children
PATIENT_ID_COUNT              1
Name: 4, dtype: object

train PATIENT_SEX                   0
MKB_CODE                    A00
ADRES               Калининград
VISIT_MONTH_YEAR           3.22
AGE_CATEGORY           children
PATIENT_ID_COUNT              4
Name: 5, dtype: object

train PATIENT_SEX                   0
MKB_CODE                    A00
ADRES               Калининград
VISIT_MONTH_YEAR           3