In [1]:
from my_models_pipline import ModelsPipline
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
import create_dataset_functions as f
import numpy as np
import pandas as pd
from collections import defaultdict
import pickle


class MainNavigation:
    def __init__(self, models_grid = None, param_grids = None, agg_func = None):
        if models_grid is not None:
            self.models_grid = models_grid
        else:
            self.models_grid = {'CatBoost': lambda: CatBoostClassifier(),
                                # 'RandomForest' : lambda: RandomForestClassifier()
                                }
        if param_grids is not None:
            self.param_grids = param_grids
        else:
            catboost_param_grid = {
                'depth': [4, 6],
                'iterations': [100],
                }

            # catboost_param_grid = {
            #     'depth': [4, 6],
            #     'learning_rate': [0.05],
            #     'iterations': [1000, 1500],
            #     'early_stopping_rounds': [50, 100]
            #     }


            # random_forest_param_grid = {
            #     'n_estimators': [100],
            #     'max_depth': [10, 20],
            #     'min_samples_split': [5]
            #     }

            self.param_grids = {
                            'CatBoost': catboost_param_grid,
                            # 'RandomForest': random_forest_param_grid
                            }
                
        if agg_func is not None:
            self.features = agg_func
        else: 
            self.features = [
                ("Range", lambda x: x.max() - x.min()),  # Размах
                ('Q1', lambda x: np.percentile(x, 25) if len(x) != 0 else np.nan),  # Первый квартиль (Q1)
                ("Q3", lambda x: np.percentile(x, 75) if len(x) != 0 else np.nan),  # Третий квартиль (Q3)
                ("IQR", lambda x: (np.percentile(x, 75) - np.percentile(x, 25)) if len(x) != 0 else np.nan),  # Межквартильный размах (IQR)
                ("Autocorr", lambda x: x.autocorr() if len(x) > 1 else np.nan),  # Автокорреляция
                ("Skewness", lambda x: x.skew() if len(x) != 0 else np.nan),  # Скос
                ("Kurtosis", lambda x: x.kurt() if len(x) != 0 else np.nan),  # Куртозис
                ("Mean", lambda x: x.mean() if len(x) != 0 else np.nan),  # Среднее значение
                ("Median", lambda x: x.median() if len(x) != 0 else np.nan),  # Медиана
                ("Std", lambda x: x.std() if len(x) != 0 else np.nan),  # Стандартное отклонение
                ("Min", lambda x: x.min() if len(x) != 0 else np.nan),  # Минимум
                ("Max", lambda x: x.max() if len(x) != 0 else np.nan)  # Максимум
                ]
        self.grid_data = defaultdict()
        self.trained_model_name = None

    def create_dataset(self, data_type = 'rolling', train_path = 'train.parquet', test_path = 'train.parquet'):
        if data_type == 'rolling':
            id_convert_ts_train, id_convert_ts_test = f.convert_rolling_time_series(train_path=train_path, 
                                                                                    test_path= test_path)
        else:
            id_convert_ts_train, id_convert_ts_test = f.convert_time_series(train_path=train_path, 
                                                                            test_path= test_path)
        
        train_data = f.create_dataset(self.features, id_ts= id_convert_ts_train)
        train_data_file = f'intervals_dataframe{len(self.grid_data)}.csv'
        train_data_name = f'{data_type}_Intervals_12Stats_{len(self.grid_data)}'
        train_data.to_csv(train_data_file)
        self.grid_data[train_data_name] = train_data_file

        test_data = f.create_dataset(self.features, id_ts= id_convert_ts_test)
        test_data_file = 'test_data.csv'
        test_data.to_csv(test_data_file)
        self.grid_data['Test'] = train_data_file


    def run_model_selection(self):
        if len(self.grid_data) == 0:
            self.grid_data = {
                'rolling_Intervals_12Stats_0': 'intervals_dataframe0.csv',
                'Test': 'test_data.csv'
            }
        model_selector = ModelsPipline(
                                    models = self.models_grid, 
                                    param_grids= self.param_grids,
                                    dataframes_path = self.grid_data)
        model_selector.run()
        results = pd.DataFrame(model_selector.results)
        print(results)
        results = results.sort_values(by='score')
        self.trained_model_name = model_selector.final_model_fit_save(results.iloc[0,:0])
        return results
        
    def get_submission(self):
        with open(self.trained_model_name, 'rb') as file:
            model = pickle.load(file)
        submission = model.predict(self.grid_data['Test'])
        submission.to_csv('submissions.csv')
        return submission

In [2]:
nagivation = MainNavigation()
# nagivation.create_dataset()
nagivation.run_model_selection()
nagivation.get_submission()

  0%|          | 0/2 [00:00<?, ?it/s]

intervals_dataframe0.csv




Learning rate set to 0.47469
0:	learn: 0.5622458	total: 166ms	remaining: 16.5s
1:	learn: 0.5169527	total: 180ms	remaining: 8.82s
2:	learn: 0.4954448	total: 192ms	remaining: 6.22s
3:	learn: 0.4825047	total: 204ms	remaining: 4.89s
4:	learn: 0.4726146	total: 217ms	remaining: 4.12s
5:	learn: 0.4659951	total: 234ms	remaining: 3.66s
6:	learn: 0.4582833	total: 249ms	remaining: 3.3s
7:	learn: 0.4522430	total: 263ms	remaining: 3.03s
8:	learn: 0.4458535	total: 277ms	remaining: 2.8s
9:	learn: 0.4400648	total: 289ms	remaining: 2.6s
10:	learn: 0.4363297	total: 301ms	remaining: 2.43s
11:	learn: 0.4319404	total: 313ms	remaining: 2.29s
12:	learn: 0.4294170	total: 325ms	remaining: 2.17s
13:	learn: 0.4250636	total: 337ms	remaining: 2.07s
14:	learn: 0.4230035	total: 349ms	remaining: 1.98s
15:	learn: 0.4211167	total: 362ms	remaining: 1.9s
16:	learn: 0.4191905	total: 376ms	remaining: 1.84s
17:	learn: 0.4174596	total: 389ms	remaining: 1.77s
18:	learn: 0.4148404	total: 404ms	remaining: 1.72s
19:	learn: 0.413




91:	learn: 0.3477329	total: 1.27s	remaining: 111ms
92:	learn: 0.3473104	total: 1.29s	remaining: 97.1ms
93:	learn: 0.3466747	total: 1.31s	remaining: 83.4ms
94:	learn: 0.3460946	total: 1.32s	remaining: 69.7ms
95:	learn: 0.3457633	total: 1.34s	remaining: 55.8ms
96:	learn: 0.3454040	total: 1.35s	remaining: 41.8ms
97:	learn: 0.3449754	total: 1.36s	remaining: 27.8ms
98:	learn: 0.3445304	total: 1.38s	remaining: 13.9ms
99:	learn: 0.3439541	total: 1.4s	remaining: 0us


100%|██████████| 1/1 [00:20<00:00, 20.21s/it][A
 50%|█████     | 1/2 [00:21<00:21, 21.34s/it]

test_data.csv




Learning rate set to 0.47469
0:	learn: 0.5580566	total: 14.3ms	remaining: 1.42s
1:	learn: 0.5162546	total: 27.6ms	remaining: 1.35s
2:	learn: 0.4993501	total: 39.9ms	remaining: 1.29s
3:	learn: 0.4863152	total: 52.3ms	remaining: 1.25s
4:	learn: 0.4748169	total: 65.1ms	remaining: 1.24s
5:	learn: 0.4652014	total: 78.9ms	remaining: 1.24s
6:	learn: 0.4578807	total: 90.9ms	remaining: 1.21s
7:	learn: 0.4532094	total: 102ms	remaining: 1.17s
8:	learn: 0.4475100	total: 115ms	remaining: 1.16s
9:	learn: 0.4430933	total: 131ms	remaining: 1.18s
10:	learn: 0.4394084	total: 143ms	remaining: 1.16s
11:	learn: 0.4348377	total: 157ms	remaining: 1.15s
12:	learn: 0.4317669	total: 171ms	remaining: 1.15s
13:	learn: 0.4295894	total: 184ms	remaining: 1.13s
14:	learn: 0.4271994	total: 195ms	remaining: 1.11s
15:	learn: 0.4245471	total: 209ms	remaining: 1.09s
16:	learn: 0.4219604	total: 221ms	remaining: 1.08s
17:	learn: 0.4200853	total: 239ms	remaining: 1.09s
18:	learn: 0.4172277	total: 253ms	remaining: 1.08s
19:	l

100%|██████████| 1/1 [00:16<00:00, 16.59s/it]
100%|██████████| 2/2 [00:39<00:00, 19.51s/it]

97:	learn: 0.3469044	total: 1.27s	remaining: 25.9ms
98:	learn: 0.3465469	total: 1.29s	remaining: 13ms
99:	learn: 0.3461167	total: 1.3s	remaining: 0us
                       dataset  \
0  rolling_Intervals_12Stats_0   
1                         Test   

                                               model     score  
0  (CatBoost, trained_CatBoost_on_rolling_Interva...  0.759304  
1           (CatBoost, trained_CatBoost_on_Test.pkl)  0.759247  





TypeError: unhashable type: 'Series'