In [1]:
import os
import pickle
import cfgrib
import numpy as np
import pandas as pd
import geopandas as gpd
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from catboost import Pool, CatBoostClassifier
from OSMPythonTools.nominatim import Nominatim
from OSMPythonTools.overpass import overpassQueryBuilder, Overpass

# модули из репозитория https://github.com/sberbank-ai/no_fire_with_ai_aij2021
import helpers, preprocessing, features_generation, prepare_train
from solution import FEATURES

import warnings
warnings.simplefilter("ignore")
plt.rcParams["figure.figsize"] = (16,8)

In [2]:
train = pd.read_csv('train.csv')
val = pd.read_csv('val.csv')
train = train.iloc[: , 1:]
val = val.iloc[: , 1:]

In [3]:
cat_features = ['month', 'day', 'weekofyear', 'dayofweek', 'place']
cat_features = train[FEATURES].columns.intersection(cat_features)
cat_features = [train[FEATURES].columns.get_loc(feat) for feat in cat_features]
cat_features

[201, 203, 204, 205, 206]

In [4]:
def get_multiclass_target(df):
    df = df.copy()
    for i in range(8, 0, -1):
        df.loc[df[f'infire_day_{i}'] == 1, 'multiclass'] = i
    df.fillna(0, inplace=True)
    return df.multiclass

In [5]:
train_targets = train.iloc[:,11:11+8]
val_targets = val.iloc[:,11:11+8]
train_target_mc = get_multiclass_target(train_targets)
val_target_mc = get_multiclass_target(val_targets)

In [6]:
train.fillna(0, inplace=True)
val.fillna(0, inplace=True)

In [7]:
train_dataset_mc = Pool(data=train[FEATURES],
                    label=train_target_mc,
                    cat_features=cat_features)

eval_dataset_mc = Pool(data=val[FEATURES],
                    label=val_target_mc,
                    cat_features=cat_features)
model_mc = CatBoostClassifier(iterations=100, random_seed=8,
                              eval_metric='MultiClass', auto_class_weights="Balanced")
model_mc.fit(train_dataset_mc,
          eval_set=eval_dataset_mc,
          verbose=False)

<catboost.core.CatBoostClassifier at 0x7f09cc7c69d0>

In [8]:
train_targets = (
    train_targets.replace(0, np.nan).fillna(axis=1, method="ffill").fillna(0).astype(int)
)

val_targets = (
    val_targets.replace(0, np.nan).fillna(axis=1, method="ffill").fillna(0).astype(int)
)

In [9]:
models = []
for i in range(8):
    train_dataset = Pool(data=train[FEATURES],
                        label=train_targets.iloc[:,i],
                        cat_features=cat_features)

    eval_dataset = Pool(data=val[FEATURES],
                        label=val_targets.iloc[:,i],
                        cat_features=cat_features)
    model = CatBoostClassifier(iterations=100, random_seed=i+1, eval_metric='F1', auto_class_weights="Balanced")
    model.fit(train_dataset,
              eval_set=eval_dataset,
              verbose=False)
    models.append(model)

In [13]:
if not os.path.exists("models/"):
    os.mkdir("models/")
for idx, model in enumerate(models):
    path_to_model = f"models/model_{idx+1}_day.pkl"

    with open(path_to_model, 'wb') as f:  
        pickle.dump(model, f)
        
with open("models/model_mc.pkl", 'wb') as f:  
    pickle.dump(model_mc, f)

In [11]:
# helpers.competition_metric(, )