## Загрузка необходимых библиотек

In [None]:
import pandas as pd
from pandas import MultiIndex, Int16Dtype
import numpy as np
import glob
import datetime
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from catboost import Pool, CatBoostRegressor
import xgboost as xgb

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler, MinMaxScaler

## 1. Создание исходного набора данных

### Объединение данных по федеральным округам в единый набор

In [None]:
## Создание объединенного датасета
folder = 'Datasets'
sets = []

# чтение файлов с данными и добавленние их в общий df
for f in glob.glob("{}/*fo.csv".format(folder)):
    try:
        sample = pd.DataFrame(pd.read_csv(f, header=0, sep=';'))
        sets.append(sample)
    except:
        pass
weather = pd.concat(sets, ignore_index=True)
weather.drop_duplicates(keep='first', inplace=True, ignore_index=True) # удаление дубликатов по строкам
weather['date'] = pd.to_datetime(weather['date']) # формат дата-время для столбца
weather['week_num'] = weather['date'].dt.isocalendar().week # добавление столбца с номером недели

### Заполнение пустых значений

In [None]:
means = pd.DataFrame(weather.groupby(['meteoid', 'week_num']).mean()[['min_air_temp', 'max_air_temp', 'avg_air_temp',
       'min_temp', 'max_temp', 'min_dew_point_temp', 'max_dew_point_temp',
       'avg_dew_point_temp', 'min_po_press', 'max_po_press', 'avg_po_press',
       'min_p_press', 'max_p_press', 'avg_p_press', 'min_wind_speed', 'max_wind_speed', 'avg_wind_speed']].reset_index())

In [None]:
drop_col = ['m_'+str(x) for x in list(means.columns)]
means.columns = drop_col
means.rename(columns={"m_meteoid": "meteoid", "m_week_num": "week_num"}, inplace=True)
means.head()

In [None]:
weather = pd.merge(weather, means, how='left', on=['meteoid', 'week_num'])

In [None]:
## Заполнение Nan-значений
# Температура воздуха
# -замена пустых значений средними многолетними значениями по соответствующим неделям
# -замена через зависимости от других показателей
weather.loc[pd.isnull(weather['min_air_temp']), 'min_air_temp'] = weather.loc[pd.isnull(weather['min_air_temp']), 'm_min_air_temp']
weather.loc[pd.isnull(weather['max_air_temp']), 'max_air_temp'] = weather.loc[pd.isnull(weather['max_air_temp']), 'm_max_air_temp']
weather.loc[pd.isnull(weather['avg_air_temp']), 'avg_air_temp'] = weather.loc[pd.isnull(weather['avg_air_temp']), 'm_avg_air_temp']

weather.loc[pd.isnull(weather['min_temp']), 'min_temp'] = weather.loc[pd.isnull(weather['min_temp']), 'min_air_temp']*1.05
weather.loc[pd.isnull(weather['max_temp']), 'max_temp'] = weather.loc[pd.isnull(weather['max_temp']), 'max_air_temp']*1.05

In [None]:
## Заполнение Nan-значений
# Атмосферное давление
# -замена пустых значений средними многолетними значениями по соответствующим неделям
weather.loc[pd.isnull(weather['min_po_press']), 'min_po_press'] = weather.loc[pd.isnull(weather['min_po_press']), 'm_min_po_press']
weather.loc[pd.isnull(weather['max_po_press']), 'max_po_press'] = weather.loc[pd.isnull(weather['max_po_press']), 'm_max_po_press']
weather.loc[pd.isnull(weather['avg_po_press']), 'avg_po_press'] = weather.loc[pd.isnull(weather['avg_po_press']), 'm_avg_po_press']

weather.loc[pd.isnull(weather['min_p_press']), 'min_p_press'] = weather.loc[pd.isnull(weather['min_p_press']), 'm_min_p_press']
weather.loc[pd.isnull(weather['max_p_press']), 'max_p_press'] = weather.loc[pd.isnull(weather['max_p_press']), 'm_max_p_press']
weather.loc[pd.isnull(weather['avg_p_press']), 'avg_p_press'] = weather.loc[pd.isnull(weather['avg_p_press']), 'm_avg_p_press']

weather.loc[pd.isnull(weather['min_baric_tendency']), 'min_baric_tendency'] = 0
weather.loc[pd.isnull(weather['max_baric_tendency']), 'max_baric_tendency'] = 0
weather.loc[pd.isnull(weather['avg_baric_tendency']), 'avg_baric_tendency'] = 0

In [None]:
## Заполнение Nan-значений
# Температура точки росы
# -замена пустых значений средними многолетними значениями по соответствующим неделям
# -замена через зависимости от других показателей
t_dew_min = weather.loc[pd.isnull(weather['min_dew_point_temp']), 'min_air_temp']-((1-(weather.loc[pd.isnull(weather['min_dew_point_temp']), 'min_u_humidity']/100))/0.05)
t_dew_max = weather.loc[pd.isnull(weather['max_dew_point_temp']), 'max_air_temp']-((1-(weather.loc[pd.isnull(weather['max_dew_point_temp']), 'max_u_humidity']/100))/0.05)
t_dew_avg = weather.loc[pd.isnull(weather['avg_dew_point_temp']), 'avg_air_temp']-((1-(weather.loc[pd.isnull(weather['avg_dew_point_temp']), 'avg_u_humidity']/100))/0.05)

weather.loc[pd.isnull(weather['min_dew_point_temp']), 'min_dew_point_temp'] = t_dew_min
weather.loc[pd.isnull(weather['max_dew_point_temp']), 'max_dew_point_temp'] = t_dew_max
weather.loc[pd.isnull(weather['avg_dew_point_temp']), 'avg_dew_point_temp'] = t_dew_avg

weather.loc[pd.isnull(weather['min_dew_point_temp']), 'min_dew_point_temp'] = weather.loc[pd.isnull(weather['min_dew_point_temp']), 'm_min_dew_point_temp']
weather.loc[pd.isnull(weather['max_dew_point_temp']), 'max_dew_point_temp'] = weather.loc[pd.isnull(weather['max_dew_point_temp']), 'm_max_dew_point_temp']
weather.loc[pd.isnull(weather['avg_dew_point_temp']), 'avg_dew_point_temp'] = weather.loc[pd.isnull(weather['avg_dew_point_temp']), 'm_avg_dew_point_temp']

# все отрицательные значения температуры точки росы приводятся к 0
weather.loc[(weather['min_dew_point_temp'] < 0), 'min_dew_point_temp'] = 0
weather.loc[(weather['max_dew_point_temp'] < 0), 'max_dew_point_temp'] = 0
weather.loc[(weather['avg_dew_point_temp'] < 0), 'avg_dew_point_temp'] = 0

In [None]:
## Заполнение Nan-значений
# Относительная влажность
# -замена через зависимости от других показателей
hum_min = (1-0.05*(weather.loc[pd.isnull(weather['min_u_humidity']), 'min_air_temp']-weather.loc[pd.isnull(weather['min_u_humidity']), 'min_dew_point_temp']))*100
hum_max = (1-0.05*(weather.loc[pd.isnull(weather['max_u_humidity']), 'max_air_temp']-weather.loc[pd.isnull(weather['max_u_humidity']), 'max_dew_point_temp']))*100
hum_avg = (1-0.05*(weather.loc[pd.isnull(weather['avg_u_humidity']), 'avg_air_temp']-weather.loc[pd.isnull(weather['avg_u_humidity']), 'avg_dew_point_temp']))*100

weather.loc[pd.isnull(weather['min_u_humidity']), 'min_u_humidity'] = hum_min
weather.loc[pd.isnull(weather['max_u_humidity']), 'max_u_humidity'] = hum_max
weather.loc[pd.isnull(weather['avg_u_humidity']), 'avg_u_humidity'] = hum_avg

weather.loc[pd.isnull(weather['min_u_humidity']), 'min_u_humidity'] = 0
weather.loc[pd.isnull(weather['max_u_humidity']), 'max_u_humidity'] = 0
weather.loc[pd.isnull(weather['avg_u_humidity']), 'avg_u_humidity'] = 0

# все отрицательные значения относительной влажности приводятся к 0
weather.loc[(weather['min_u_humidity'] < 0), 'min_u_humidity'] = 0
weather.loc[(weather['max_u_humidity'] < 0), 'max_u_humidity'] = 0
weather.loc[(weather['avg_u_humidity'] < 0), 'avg_u_humidity'] = 0

In [None]:
## Заполнение Nan-значений
# Скорость ветра
# -замена пустых значений средними многолетними значениями по соответствующим неделям
# -замена через зависимости от других показателей
weather.loc[pd.isnull(weather['min_wind_speed']), 'min_wind_speed'] = weather.loc[pd.isnull(weather['min_wind_speed']), 'm_min_wind_speed']
weather.loc[pd.isnull(weather['max_wind_speed']), 'max_wind_speed'] = weather.loc[pd.isnull(weather['max_wind_speed']), 'm_max_wind_speed']
weather.loc[pd.isnull(weather['avg_wind_speed']), 'avg_wind_speed'] = weather.loc[pd.isnull(weather['avg_wind_speed']), 'm_avg_wind_speed']

weather.loc[pd.isnull(weather['min_wind_gust_before']), 'min_wind_gust_before'] = 0
weather.loc[pd.isnull(weather['max_wind_gust_before']), 'max_wind_gust_before'] = 0
weather.loc[pd.isnull(weather['avg_wind_gust_before']), 'avg_wind_gust_before'] = 0

weather.loc[pd.isnull(weather['min_wind_gust_between']), 'min_wind_gust_between'] = 0
weather.loc[pd.isnull(weather['max_wind_gust_between']), 'max_wind_gust_between'] = 0
weather.loc[pd.isnull(weather['avg_wind_gust_between']), 'avg_wind_gust_between'] = 0

In [None]:
## Заполнение Nan-значений
# Температура почвы
# -замена через зависимости от других показателей
weather.loc[pd.isnull(weather['max_ground_temp']), 'max_ground_temp'] = weather.loc[pd.isnull(weather['max_ground_temp']), 'max_air_temp']*1.11
weather.loc[pd.isnull(weather['min_ground_temp']), 'min_ground_temp'] = weather.loc[pd.isnull(weather['min_ground_temp']), 'min_air_temp']*0.90
weather.loc[pd.isnull(weather['avg_ground_temp']), 'avg_ground_temp'] = weather.loc[pd.isnull(weather['avg_ground_temp']), 'avg_air_temp']*0.98

In [None]:
## Заполнение Nan-значений
# Осадки
# -замена через зависимости от других показателей
weather.loc[pd.isnull(weather['sum_precipitation']), 'sum_precipitation'] = 0
weather.loc[pd.isnull(weather['avg_precipitation']), 'avg_precipitation'] = 0
weather.loc[pd.isnull(weather['avg_precipitation_time']), 'avg_precipitation_time'] = 0

### Удаление лишних "рабочих" столбцов и сохранение данных в файл

In [None]:
## Удаление лишних столбцов
weather.drop(columns=drop_col[2:], inplace=True)

In [None]:
## Информация о наборе данных
weather.info()

In [None]:
## Сохранение данных в файл 
# (csv-формат)
#file_csv = pd.DataFrame(weather).to_csv('Datasets/weather_full.csv', sep=';', index=False)

# (excel-формат)
#file_excel = pd.DataFrame(weather).to_excel('Datasets/weather_full.xlsx')

In [None]:
del sets

## 2. Создание набора данных для обучения предиктивной модели

In [None]:
## Чтение данных
#weather = pd.read_csv('Datasets/Weather.csv', header=0, sep=';')

In [None]:
## Сортировка датафрейма по дате и метеостанциям
weather = weather.sort_values(by=['meteoid', 'date'], ascending=True).reset_index()
weather.drop(columns='index', inplace=True)
weather.head()

In [None]:
## Удаление лишних столбцов
# список столбцов на удаление
drop_col = ['min_p_press', 'max_p_press', 'avg_p_press', 'min_visibility_range', 'max_visibility_range', 'avg_visibility_range']
## Удаление лишних столбцов
weather.drop(columns=drop_col, inplace=True)

In [None]:
## Информация о наборе данных
weather.info()

In [None]:
## Создать сэмпл по годам и метеостанциям
season_data = []

for m in weather['meteoid'].unique():
    for y in weather['years'].unique():
        try:
            ds = weather.loc[(weather['years'] == y) & (weather['meteoid'] == m)]
            i_start = int(min(ds.loc[(ds['min_ground_temp'] >= 8.)].index))
            i_end = i_start+21
            season = ds.loc[i_start:i_end]
        except:
            pass
        # создаем датасет по сезонам
        elem = []
        for i in range(22):
            try:
                elem+=list(season.values[i])
                season_data.append(elem)
            except:
                pass

In [None]:
## Создание набора данных для обучения и тестирования предиктивной модели
# создание списка с наименованием столбцов
col = []

for week in range(1,23):
    for items in weather.columns:
        c = 'w{}_{}'.format(week, items)
        col.append(c)
        
# создание датафрейма для ML-модели
weather = pd.DataFrame(season_data, columns = col)
weather.drop_duplicates(keep='first', inplace=True, ignore_index=True)
weather.tail()

In [None]:
del season_data

In [None]:
## Удаление неинформативных столбцов
# создание списка неинформативных столбцов
drop_col = []

for week in range(2,22):
    for items in ['date', 'years', 'meteoid', 'week_num']:
        c = 'w{}_{}'.format(week, items)
        drop_col.append(c)
        
# удаление столбцов
weather.drop(columns=drop_col, inplace=True)
weather.drop(columns=['w1_date', 'w1_years', 'w1_meteoid', 'w22_date'], inplace=True)
weather.rename(columns={"w1_week_num": "week_start", "w22_week_num": "week_end", "w22_years": "year", "w22_meteoid": "meteoid"}, inplace=True)

In [None]:
## Информация о наборе данных
weather.info()

In [None]:
weather.tail()

In [None]:
## Сохранение набора данных для модели в файл 
# (csv-формат)
file_csv = weather.to_csv('Datasets/weather_data.csv', sep=';', index=False)

## 3. Обучение предиктивной модели

In [1]:
## Загрузка необходимых библиотек
import pandas as pd
from pandas import MultiIndex, Int16Dtype
import numpy as np
import glob
import datetime
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from catboost import CatBoostRegressor, Pool, cv
import xgboost as xgb

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler, MinMaxScaler

  from pandas import MultiIndex, Int64Index


In [2]:
## Чтение данных из файла
weather = pd.read_csv('Datasets/weather_data.csv', header=0, sep=';')
settl = pd.read_csv('Datasets/Settlement_Id.csv', header=0, sep=';')
target = pd.read_csv('Datasets/Target.csv', header=0, sep=';')

In [3]:
## Добавление id метеостанций к фрейму с целевой переменной
target = target[['digit_id', 'year', 'category', 'yield']]
meteo = settl[['digit_id', 'meteoid']].drop_duplicates()
target = pd.merge(target, meteo, how='left', on=['digit_id'])
target.drop_duplicates(keep='first', inplace=True, ignore_index=True)

In [4]:
## Удаление лишних данных
del settl
del meteo

In [5]:
## Создание учебного датафрейма
target.drop(columns=['digit_id'], inplace=True)
data = pd.merge(weather, target, how='right', on=['year', 'meteoid'])
data.drop_duplicates(keep='first', inplace=True, ignore_index=True)

In [6]:
del weather

**Что делаем:** <br>
- разбить данные на train и test (test: year=2021) <br>
- назначить label (параметр "yield") <br>
- удалить лишние столбцы ('digit_id', 'year', 'meteoid') <br>
- ???номализовать данные (StandardScaler) <br>
- форматировать данные с помощью Pool <br>

In [7]:
## Разбиение данных на train и test
train = data.loc[(data['year'] != 2021)]
train_label = train['yield']
test = data.loc[(data['year'] == 2021)]
test_label = test['yield']

# удаление лишних столбцов
train = train.drop(columns=['year', 'meteoid', 'yield'])
test = test.drop(columns=['year', 'meteoid', 'yield'])

In [8]:
category = ['category']

train_data = Pool(data=train,
                  label= train_label,
                  cat_features=category)

test_data = Pool(data=test,
                 cat_features=category)

In [25]:
cat = CatBoostRegressor(iterations=5000, loss_function='RMSE')
cat.fit(train_data, verbose=False, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

<catboost.core.CatBoostRegressor at 0x2c8fe3fac10>

In [27]:
train_data = Pool(data=train,
                  label= train_label,
                  cat_features=category)

params = {"iterations": 5000,
          "loss_function": 'RMSE',
          "verbose": False}

scores = cv(train_data,
            params,
            fold_count=3, 
            plot="True")


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Training on fold [0/3]

bestTest = 4.310993085
bestIteration = 2865

Training on fold [1/3]

bestTest = 4.275437712
bestIteration = 2571

Training on fold [2/3]

bestTest = 4.275863891
bestIteration = 2897



In [10]:
model = CatBoostRegressor(iterations=3000, loss_function='RMSE', verbose=False) #, boosting_type='Ordered')

grid = {'depth': [4, 6, 10, 15],
        'l2_leaf_reg': [1, 3, 5, 7],
        'learning_rate': [0.025, 0.05, 0.075, 0.1],
        'max_leaves': [50, 64, 100]
       }

grid_result = model.grid_search(grid,
                                train_data,
                                cv=3,
                                plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))


bestTest = 4.284136717
bestIteration = 2954

0:	loss: 4.2841367	best: 4.2841367 (0)	total: 10m 33s	remaining: 1d 9h 38m 4s

bestTest = 4.27857164
bestIteration = 1396

1:	loss: 4.2785716	best: 4.2785716 (1)	total: 21m 50s	remaining: 1d 10h 34m 47s

bestTest = 4.28677876
bestIteration = 1028

2:	loss: 4.2867788	best: 4.2785716 (1)	total: 32m 49s	remaining: 1d 10h 28m 5s

bestTest = 4.281994157
bestIteration = 729

3:	loss: 4.2819942	best: 4.2785716 (1)	total: 42m 25s	remaining: 1d 9h 13m 55s

bestTest = 4.284150748
bestIteration = 2999

4:	loss: 4.2841507	best: 4.2785716 (1)	total: 54m 23s	remaining: 1d 9h 54m 18s

bestTest = 4.28233531
bestIteration = 1787

5:	loss: 4.2823353	best: 4.2785716 (1)	total: 1h 4m 58s	remaining: 1d 9h 34m 5s

bestTest = 4.290433963
bestIteration = 1191

6:	loss: 4.2904340	best: 4.2785716 (1)	total: 1h 16m 11s	remaining: 1d 9h 33m 42s

bestTest = 4.290399659
bestIteration = 940

7:	loss: 4.2903997	best: 4.2785716 (1)	total: 1h 27m 43s	remaining: 1d 9h 37m 31

In [12]:
model2 = CatBoostRegressor(iterations=3000, loss_function='RMSE', verbose=False) #, boosting_type='Ordered')
grid2 = {'depth': [5, 6, 7],
        'l2_leaf_reg': [0.75, 1, 1.25],
        'learning_rate': [0.04, 0.05, 0.06],
        'max_leaves': [63, 64, 65]
       }

grid_result2 = model2.grid_search(grid2,
                                train_data,
                                cv=3,
                                plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))


bestTest = 4.280293677
bestIteration = 1657

0:	loss: 4.2802937	best: 4.2802937 (0)	total: 9m 42s	remaining: 12h 56m 17s

bestTest = 4.283499996
bestIteration = 1496

1:	loss: 4.2835000	best: 4.2802937 (0)	total: 19m 56s	remaining: 13h 7m 29s

bestTest = 4.283779254
bestIteration = 1265

2:	loss: 4.2837793	best: 4.2802937 (0)	total: 30m 34s	remaining: 13h 14m 51s

bestTest = 4.278388806
bestIteration = 1902

3:	loss: 4.2783888	best: 4.2783888 (3)	total: 40m 10s	remaining: 12h 53m 15s

bestTest = 4.27857164
bestIteration = 1396

4:	loss: 4.2785716	best: 4.2783888 (3)	total: 50m 53s	remaining: 12h 53m 26s

bestTest = 4.281986587
bestIteration = 1099

5:	loss: 4.2819866	best: 4.2783888 (3)	total: 1h 1m 58s	remaining: 12h 54m 35s

bestTest = 4.280016739
bestIteration = 1849

6:	loss: 4.2800167	best: 4.2783888 (3)	total: 1h 12m	remaining: 12h 41m 10s

bestTest = 4.286722293
bestIteration = 1445

7:	loss: 4.2867223	best: 4.2783888 (3)	total: 1h 22m 6s	remaining: 12h 29m 12s

bestTest = 4.28

In [34]:
cat.get_all_params()

{'nan_mode': 'Min',
 'eval_metric': 'RMSE',
 'combinations_ctr': ['Borders:CtrBorderCount=15:CtrBorderType=Uniform:TargetBorderCount=1:TargetBorderType=MinEntropy:Prior=0/1:Prior=0.5/1:Prior=1/1',
  'Counter:CtrBorderCount=15:CtrBorderType=Uniform:Prior=0/1'],
 'iterations': 1000,
 'sampling_frequency': 'PerTree',
 'fold_permutation_block': 0,
 'leaf_estimation_method': 'Newton',
 'counter_calc_method': 'SkipTest',
 'grow_policy': 'SymmetricTree',
 'penalties_coefficient': 1,
 'boosting_type': 'Plain',
 'model_shrink_mode': 'Constant',
 'feature_border_type': 'GreedyLogSum',
 'ctr_leaf_count_limit': 18446744073709551615,
 'bayesian_matrix_reg': 0.10000000149011612,
 'one_hot_max_size': 2,
 'force_unit_auto_pair_weights': False,
 'l2_leaf_reg': 3,
 'random_strength': 1,
 'rsm': 1,
 'boost_from_average': True,
 'max_ctr_complexity': 4,
 'model_size_reg': 0.5,
 'simple_ctr': ['Borders:CtrBorderCount=15:CtrBorderType=Uniform:TargetBorderCount=1:TargetBorderType=MinEntropy:Prior=0/1:Prior=0

In [21]:
test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4306 entries, 14 to 52628
Columns: 1037 entries, w1_min_air_temp to category
dtypes: float64(1036), object(1)
memory usage: 34.1+ MB


In [23]:
train.tail()

Unnamed: 0,w1_min_air_temp,w1_max_air_temp,w1_avg_air_temp,w1_min_temp,w1_max_temp,w1_min_dew_point_temp,w1_max_dew_point_temp,w1_avg_dew_point_temp,w1_min_ground_temp,w1_max_ground_temp,...,w22_avg_height_clouds,w22_avg_cm_clouds,w22_avg_ch_clouds,w22_sum_precipitation,w22_avg_precipitation,w22_avg_precipitation_time,w22_avg_soil_surface,w22_avg_soil_surface_withsnow,week_end,category
52623,15.1,32.3,23.708929,14.4,33.4,8.1,19.2,12.708929,11.0,19.0,...,803.571429,1.339286,1.089286,9.45,0.726923,12.0,0.0,1.035714,47.0,СХО
52624,13.8,32.6,23.867857,13.6,33.0,8.0,19.3,13.233929,12.0,18.0,...,1102.678571,4.035714,1.0,8.45,0.603571,12.0,0.0,0.928571,47.0,СХО
52625,12.4,23.5,18.126786,12.0,24.7,6.5,16.9,11.132143,9.0,15.0,...,514.545455,1.290909,1.072727,12.5,0.892857,12.0,0.0,0.327273,47.0,Хозяйства всех категорий
52626,11.1,23.0,16.746429,10.4,23.9,6.1,13.6,10.348214,10.0,15.0,...,1363.392857,3.696429,1.0,0.75,0.053571,12.0,0.0,0.160714,45.0,Хозяйства всех категорий
52629,13.8,32.6,23.867857,13.6,33.0,8.0,19.3,13.233929,12.0,18.0,...,1102.678571,4.035714,1.0,8.45,0.603571,12.0,0.0,0.928571,47.0,Хозяйства всех категорий


In [10]:
#data.columns[-60:]