# Сложные функции

# Генератор конфигураций по типу GridSearch

In [16]:
def config_generator(new_config, config):
    '''
    Генератор конфигурации модели на базе словаря с новыми настройками
    '''
    from itertools import product
    
    # Словарь с изменениями превращаем в структуру: [full_key, value]
    # [
    #  [[key11, key12..], value1],
    #  [[key21, key22..], value2],
    # ..]
    
    dict_to_list = {}
    def ReadNewDict(new_dict, parents):
        if not isinstance(new_dict, dict) or len(new_dict) == 0:
            dict_to_list[parents] = new_dict
            return None

        for curr_parent in new_dict:
            ReadNewDict(new_dict[curr_parent], parents + (curr_parent, ))
            
    ReadNewDict(new_config, ())
    
    
    
    # По full_key спускаемся через temp_config до value и меняем его
    # Пока value выступает либо сам элемент, либо первый элемент list/tuple
    
    for full_key, value in list(dict_to_list.items()):
        temp_config = config
        
        for key in full_key[:-1]:
            if key in temp_config:
                temp_config = temp_config[key]
            else:
                temp_config[key] = {}
                temp_config = temp_config[key]
                
        if isinstance(value, list) or isinstance(value, tuple):
            assert len(value), 'Передан пустой список'
            temp_config[full_key[-1]] = value[0]
        else:
            temp_config[full_key[-1]] = value
            del dict_to_list[full_key]
            
    
    
    # Генератор пробегает по всем комбинациям элементов config
    
    for values in product(*dict_to_list.values()):
        for ind, full_key in enumerate(dict_to_list):
            temp_config = config
        
            for key in full_key[:-1]:
                temp_config = temp_config[key]
                    
            temp_config[full_key[-1]] = values[ind]
        yield config

In [17]:
my_dict = {"test_len": [1, 3],
           "prediction_len": 0,
           "training": {"epochs": [100, 200], "batch_size": 10}}

config = {"test_len": 6,
          "prediction_len": 0,
          "factors": False,
          "training": {"epochs": 100, "batch_size": 32}}
    
from pprint import pprint
for i in config_generator(my_dict, config):
    pprint(i)

{'factors': False,
 'prediction_len': 0,
 'test_len': 1,
 'training': {'batch_size': 10, 'epochs': 100}}
{'factors': False,
 'prediction_len': 0,
 'test_len': 1,
 'training': {'batch_size': 10, 'epochs': 200}}
{'factors': False,
 'prediction_len': 0,
 'test_len': 3,
 'training': {'batch_size': 10, 'epochs': 100}}
{'factors': False,
 'prediction_len': 0,
 'test_len': 3,
 'training': {'batch_size': 10, 'epochs': 200}}


In [44]:
class CustomGridSearch:
    '''
    Кастомный аналог GridSearch
    '''
    def __init__(self, model, config, X, y, metric='MSE', test_size=0.2, data_tranform=None, random_state=42):
        import numpy as np
        from sklearn.model_selection import train_test_split
        import warnings
        warnings.simplefilter('ignore')
        
        assert isinstance(config, dict), 'ConfigTypeError'
        assert isinstance(X, np.array), 'XTypeError'
        assert isinstance(y, np.array), 'yTypeError'
        
        self.model = model
        self.config = config
        
        if data_tranform:
            X, y = data_tranform(X, y)
            
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, 
                                                                                test_size=test_size, 
                                                                                random_state=random_state)
        self.metric = metric
        self.config_history = np.array([])
        self.score_history = np.array([])
        
        
        
    
    def score(self, preds, true, metric):
        if metric == 'MSE':
            return np.sum((preds - true) ** 2) / true.shape[0]
    
    
    def fit(self):
        for params in config_generator(self.config, self.model.__dict__):
            self.model.__dict__ = params
            self.model.fit(self.X_train, self.y_train)
            preds = self.model.predict(self.X_test)
            score = self.score(preds, self.y_test, self.metric)
            self.score_history = np.hstack([self.score_history, score])
            self.config_history = np.hstack([self.config_history, params])
            
    
    def best_score(self):
        return self.score_history.max()
    
    
    def best_parameters(self):
        return self.config_history[self.score_history.argmax()]

In [46]:
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
X = np.random.randint(1, 100, (20, 3))
y = np.random.randint(1, 100, 20)
config = {'class_weight': [None, 'balanced'],
          'C': np.linspace(0.1, 1, 10).tolist()}

gs = CustomGridSearch(lr, config, X, y)

gs.fit()
gs.best_score()
gs.best_parameters()

{'penalty': 'l2',
 'dual': False,
 'tol': 0.0001,
 'C': 1.0,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'class_weight': 'balanced',
 'random_state': None,
 'solver': 'lbfgs',
 'max_iter': 100,
 'multi_class': 'auto',
 'verbose': 0,
 'warm_start': False,
 'n_jobs': None,
 'l1_ratio': None,
 'classes_': array([ 9, 10, 15, 19, 27, 36, 37, 44, 65, 68, 86, 95, 96, 97]),
 'coef_': array([[ 0.07322156,  0.08688119,  0.00916795],
        [ 0.32576247, -0.58379819,  0.36673088],
        [ 0.68695088, -0.35820082, -0.54917821],
        [-0.13411722, -0.1721953 ,  0.45890131],
        [-0.65530586,  0.61302338, -0.33947975],
        [-0.43896203,  0.11265482,  0.33417167],
        [-0.87049366,  0.33539638,  0.17941639],
        [-0.1887969 ,  0.30887698, -0.12424014],
        [ 0.2710205 ,  0.08125079, -0.44491176],
        [ 0.50848605, -0.37294943,  0.11974476],
        [ 0.05008093, -0.07351841,  0.25890365],
        [ 0.14475121, -0.30693139,  0.3881396 ],
        [ 0.35839549, -0.124