In [6]:
from sklearn.ensemble import ExtraTreesRegressor, BaggingRegressor, RandomForestRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression

In [7]:
import pandas as pd 
y_df = pd.read_csv('/home/oleg/Documents/matmod_challeng/y.csv')
y_df.columns[3:]

Index(['BRAT', 'DEGT', 'DELFN', 'DELN1', 'DELVSV', 'DPOIL', 'EGTC', 'EGTHDM',
       'EGTHDM_D', 'GEGTMC', 'GN2MC', 'GPCN25', 'GWFM', 'PCN12', 'PCN12I',
       'PCN1AR', 'PCN1BR', 'PCN1K', 'PCN2C', 'SLOATL', 'SLOATL_D', 'VSVNOM',
       'WBE', 'WBI', 'WFMP', 'ZPCN25_D', 'ZT49_D', 'ZTLA_D', 'ZTNAC_D',
       'ZWF36_D'],
      dtype='object')

In [8]:


fields_and_models = { 'BRAT': LGBMRegressor,
                    'DEGT': ExtraTreesRegressor,
                    'DELFN': LGBMRegressor,
                    'DELN1': BaggingRegressor,
                    'DELVSV': LGBMRegressor,
                    'DPOIL': LGBMRegressor,
                    'EGTC': LGBMRegressor,
                    'EGTHDM': ExtraTreesRegressor,
                    'EGTHDM_D': ExtraTreesRegressor,
                    'GEGTMC': LGBMRegressor,
                    'GN2MC': LGBMRegressor,
                    'GPCN25': ExtraTreesRegressor,
                    'GWFM': ExtraTreesRegressor,
                    'PCN12': ExtraTreesRegressor,
                    'PCN12I': LGBMRegressor,
                    'PCN1AR': LGBMRegressor,
                    'PCN1BR': LGBMRegressor,
                    'PCN1K': ExtraTreesRegressor,
                    'PCN2C': LGBMRegressor,
                    'SLOATL': ExtraTreesRegressor,
                    'SLOATL_D': ExtraTreesRegressor,
                    'WBE': LinearRegression,
                    'WBI':  LGBMRegressor,
                    'WFMP': LGBMRegressor,
                    'ZPCN25_D': ExtraTreesRegressor,
                    'ZT49_D': ExtraTreesRegressor,
                    'ZTLA_D': ExtraTreesRegressor,
                    'ZTNAC_D': ExtraTreesRegressor,
                    'ZWF36_D': RandomForestRegressor }   # field: model

In [9]:
import pandas as pd
import pickle


def create_model(x_path: str, savepath: str):
    x_df = pd.read_csv(x_path)
    y_df = pd.read_csv('/home/oleg/Documents/matmod_challeng/y.csv')
    merged_df = pd.merge(x_df, y_df, on=["engine_id", "flight_datetime", "flight_phase"])

    merged_df = merged_df.dropna(axis=1, how='all')
    y_params = y_df.columns[3:]


    params_to_predict = []


    for val in y_params:
        if val in merged_df:
            params_to_predict.append(val)
    
    for predicted_param in params_to_predict:    
        
        # init df
        merged_df = pd.merge(x_df, y_df, on=["engine_id", "flight_datetime", "flight_phase"])

        # df w/o y params
        params_to_delete = y_params.drop(predicted_param)
        merged_df = merged_df.drop(params_to_delete, axis=1)

        merged_df = merged_df.dropna(how='all', axis=1)

        # cleaned df
        to_rm = [  "flight_datetime", 'engine_id', 'aircraft_id', 'flight_phase', 'engine_position', 'number_blades', 'engine_family', 'engine_type', 'manufacturer', 'aircraft_family', 'aircraft_grp', 'ac_manufacturer', 'aircraft_type', ]
        
        if 'Unnamed: 0' in merged_df.columns:
            merged_df = merged_df.drop(['Unnamed: 0'], axis=1)
        
        merged_df = merged_df.drop(to_rm, axis=1)
        merged_df = merged_df.loc[:,merged_df.apply(pd.Series.nunique) != 1]


        if predicted_param in merged_df.columns:
            columns_w_train_data = merged_df.columns.drop([predicted_param])

            x = merged_df[columns_w_train_data]
            y = merged_df[predicted_param]

            x_train = x.fillna(0)
            y_train = y.fillna(0)

            # select model
            model = fields_and_models[predicted_param]()
            model.fit(x_train, y_train)


            savepath_cp = savepath + '_' + predicted_param + '.pickle'
            pickle.dump(model, open( savepath_cp, "wb"))



In [10]:
import pandas as pd
import re
from pathlib import Path


orig_df = pd.read_csv('X.csv')

engine_types = orig_df['engine_type'].unique()
flight_phases = orig_df['flight_phase'].unique()
n1_modifiers = orig_df['n1_modifier'].unique()



print('------------- split by eng, phase, n1')
# split by eng, phase, n1
for engine_type in engine_types:
    for flight_phase in flight_phases:
        for n1_modifier in n1_modifiers:
            engine_type = re.sub(r'[/]', '', engine_type)
            filename = str(engine_type) + '_' + str(n1_modifier) + '_' + str(flight_phase)

            x_dir = '/home/oleg/Documents/matmod_challeng/splitted_x_data/by_eng_phase_n1/' + filename
            
            my_file = Path(x_dir)
            if my_file.is_file():
                model_savepath = '/home/oleg/Documents/matmod_challeng/splitted_models/' + x_dir.split('/')[-2] + '/' + x_dir.split('/')[-1] 

                create_model(x_dir, model_savepath )
                


print('------------- split by phase')
# split by phase

for flight_phase in flight_phases:
    filename = str(flight_phase)

    x_dir = '/home/oleg/Documents/matmod_challeng/splitted_x_data/by_phase/' + filename
    
    my_file = Path(x_dir)
    if my_file.is_file():
        model_savepath = '/home/oleg/Documents/matmod_challeng/splitted_models/' + x_dir.split('/')[-2] +  '/' + x_dir.split('/')[-1]  

        create_model(x_dir, model_savepath)


print('------------- general')
# general

filename = 'general'

x_dir = '/home/oleg/Documents/matmod_challeng/splitted_x_data/general/' + filename

my_file = Path(x_dir)
if my_file.is_file():
    model_savepath = '/home/oleg/Documents/matmod_challeng/splitted_models/' + x_dir.split('/')[-2] +  '/' + x_dir.split('/')[-1]  

    create_model(x_dir, model_savepath)



------------- split by eng, phase, n1
------------- split by phase
------------- general
