# 06_IDAO_Fit_models

* обучение и сохранение моделей для каждого параметра отдельно

In [1]:
import sys
import warnings
warnings.filterwarnings('ignore')
import time

from sklearn.externals import joblib

import matplotlib.pylab as plt
import pandas as pd
import numpy as np
import os.path

from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import lightgbm as ltb



Выбираем параметр:

In [16]:
param = 'x'

Выбираем значимые признаки, полученные на этапе feature engeneering:

In [17]:
features_for_param = ['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim',
            'sum_x_y', 'sum_x_z', 'sum_y_z',
            'dif_x_y', 'dif_x_z', 'dif_y_z',
            'mul_x_y', 'mul_x_z',
            'sum_Vx_Vy', 'sum_Vx_Vz', 'sum_Vy_Vz',
            'dif_Vx_Vy',
            'mul_Vx_Vy',
            'div_x_y']

model_LTB = ltb.LGBMRegressor(silent=True, random_state=42,
                                         learning_rate=0.3,
                                         max_depth=13,
                                         n_estimators=3000)

In [4]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs((satellite_predicted_values - satellite_true_values) 
        / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values))))

In [5]:
smape_score = make_scorer(smape)

In [6]:
#Тренировочные данные
df_train = pd.read_csv('data/Track 1/train_final.csv')
#Тестовые данные
df_test = pd.read_csv('data/Track 1/test_final.csv')

df_train = df_train.drop(['Unnamed: 0'], axis=1)
df_test = df_test.drop(['Unnamed: 0'], axis=1)

In [7]:
df_train = df_train.drop(['time', 'date'], axis=1)
neworder = ['id', 'sat_id', 'x','y','z','Vx','Vy','Vz', 'x_sim','y_sim','z_sim','Vx_sim','Vy_sim','Vz_sim']
df_train=df_train.reindex(columns=neworder)
df_train['sat_id'] = df_train['sat_id'].astype('int64')

In [8]:
df_train.head(3)

Unnamed: 0,id,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
0,1819.0,1,10390.313089,-2796.458271,3179.562085,2.520477,6.14993,-2.827599,10405.813755,-2771.18076,3166.926302,2.508879,6.152996,-2.826227
1,,1,10428.328529,-2424.698448,2989.854516,2.348307,6.134658,-2.847816,10443.297921,-2399.492986,2977.491684,2.337052,6.137385,-2.846235
2,,1,10466.34397,-2052.938626,2800.146946,2.176136,6.119386,-2.868033,10480.782087,-2027.805212,2788.057066,2.165225,6.121774,-2.866242


In [9]:
# form sat list
df_test['sat_id'] = df_test['sat_id'].astype('int64')
sat_id_test = list(set(df_test['sat_id']))
del(df_test)

In [18]:
def create_model(df, list_sat_id, list_of_features, model_LTB):
    for sat_id in list_sat_id:
        
        # create path for future model
        model_path = 'models/minute_' + param + '/lgb_' + param + '_' + str(sat_id) + '.pkl'
        
        #if model hasn't created earlier
        if not os.path.exists(model_path):
            df_one_sat = df_train.query('sat_id == @sat_id')

            df_one_sat['sum_x_y'] = df_one_sat['x_sim'] + df_one_sat['y_sim']
            df_one_sat['sum_x_z'] = df_one_sat['x_sim'] + df_one_sat['z_sim']
            df_one_sat['sum_y_z'] = df_one_sat['y_sim'] + df_one_sat['z_sim']

            df_one_sat['dif_x_y'] = df_one_sat['x_sim'] - df_one_sat['y_sim']
            df_one_sat['dif_x_z'] = df_one_sat['x_sim'] - df_one_sat['z_sim']
            df_one_sat['dif_y_z'] = df_one_sat['y_sim'] - df_one_sat['z_sim']

            df_one_sat['mul_x_y'] = df_one_sat['x_sim'] * df_one_sat['y_sim']
            df_one_sat['mul_x_z'] = df_one_sat['x_sim'] * df_one_sat['z_sim']
            df_one_sat['mul_y_z'] = df_one_sat['y_sim'] * df_one_sat['z_sim']

            df_one_sat['div_x_y'] = df_one_sat['x_sim'] / df_one_sat['y_sim']
            df_one_sat['div_x_z'] = df_one_sat['x_sim'] / df_one_sat['z_sim']
            df_one_sat['div_y_z'] = df_one_sat['y_sim'] / df_one_sat['z_sim']

            df_one_sat['sum_Vx_Vy'] = df_one_sat['Vx_sim'] + df_one_sat['Vy_sim']
            df_one_sat['sum_Vx_Vz'] = df_one_sat['Vx_sim'] + df_one_sat['Vz_sim']
            df_one_sat['sum_Vy_Vz'] = df_one_sat['Vy_sim'] + df_one_sat['Vz_sim']

            df_one_sat['dif_Vx_Vy'] = df_one_sat['Vx_sim'] - df_one_sat['Vy_sim']
            df_one_sat['dif_Vx_Vz'] = df_one_sat['Vx_sim'] - df_one_sat['Vz_sim']
            df_one_sat['dif_Vy_Vz'] = df_one_sat['Vy_sim'] - df_one_sat['Vz_sim']

            df_one_sat['mul_Vx_Vy'] = df_one_sat['Vx_sim'] * df_one_sat['Vy_sim']
            df_one_sat['mul_Vx_Vz'] = df_one_sat['Vx_sim'] * df_one_sat['Vz_sim']
            df_one_sat['mul_Vy_Vz'] = df_one_sat['Vy_sim'] * df_one_sat['Vz_sim']

            features = list_of_features
            target = [param]

            features_train = df_one_sat[features]
            target_train = df_one_sat[target]
            
            model_LTB.fit(features_train, target_train)
            joblib.dump(model_LTB, model_path)
            print('model', sat_id, 'created')
        else:
            print('model', sat_id, 'already created')
        

In [19]:
create_model(df_train, sat_id_test, features_for_param, model_LTB)

model 1 created
model 514 created
model 2 created
model 3 created
model 4 created
model 6 created
model 515 created
model 516 created
model 9 created
model 517 created
model 519 created
model 522 created
model 523 created
model 524 created
model 16 created
model 528 created
model 20 created
model 22 created
model 24 created
model 25 created
model 26 created
model 27 created
model 28 created
model 29 created
model 536 created
model 537 created
model 32 created
model 539 created
model 34 created
model 35 created
model 36 created
model 37 created
model 38 created
model 39 created
model 40 created
model 41 created
model 42 created
model 547 created
model 44 created
model 45 created
model 549 created
model 550 created
model 558 created
model 49 created
model 51 created
model 52 created
model 53 created
model 54 created
model 563 created
model 565 created
model 57 created
model 566 created
model 59 created
model 567 created
model 569 created
model 572 created
model 63 created
model 64 create