In [2]:
import pathlib
import os
from pathlib import Path
import sys
 
if sys.argv:
    sys.path.insert(0, str(pathlib.Path(os.path.dirname(os.path.abspath(""))).resolve()))
%load_ext autoreload
%autoreload 2

In [3]:
from src.models.IModelUplift import IModelUplift

Descnn.py

In [29]:
import torch
import torch.nn as nn
import math

def init_weights(m):
    if isinstance(m, nn.Linear):
        stdv = 1 / math.sqrt(m.weight.size(1))
        torch.nn.init.normal_(m.weight, mean=0.0, std=stdv)
        # torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0)

def safe_sqrt(x):
    ''' Numerically safe version of Pytoch sqrt '''
    return torch.sqrt(torch.clip(x, 1e-9, 1e+9))

class ShareNetwork(nn.Module):
    def __init__(self, input_dim, share_dim, base_dim, cfg, device):
        super(ShareNetwork, self).__init__()
        if cfg.get('BatchNorm1d', 'false') == 'true':
            self.DNN = nn.Sequential(
                nn.BatchNorm1d(input_dim),
                nn.Linear(input_dim, share_dim),
                nn.ELU(),
                nn.Dropout(p=cfg.get('do_rate', 0.2)),
                nn.Linear(share_dim, share_dim),
                nn.ELU(),
                nn.Dropout(p=cfg.get('do_rate', 0.2)),
                nn.Linear(share_dim, base_dim),
                nn.ELU(),
                nn.Dropout(p=cfg.get('do_rate', 0.2))
            )
        else:
            self.DNN = nn.Sequential(
                nn.Linear(input_dim, share_dim),
                nn.ELU(),
                nn.Dropout(p=cfg.get('do_rate', 0.2)),
                nn.Linear(share_dim, share_dim),
                nn.ELU(),
                nn.Dropout(p=cfg.get('do_rate', 0.2)),
                nn.Linear(share_dim, base_dim),
                nn.ELU(),
            )

        self.DNN.apply(init_weights)
        self.cfg = cfg
        self.device = device
        self.to(device)

    def forward(self, x):
        x = x.to(self.device)
        h_rep = self.DNN(x)
        if self.cfg.get('normalization', 'none') == "divide":
            h_rep_norm = h_rep / safe_sqrt(torch.sum(torch.square(h_rep), dim=1, keepdim=True))
        else:
            h_rep_norm = 1.0 * h_rep
        return h_rep_norm


class BaseModel(nn.Module):
    def __init__(self, base_dim, cfg):
        super(BaseModel, self).__init__()
        self.DNN = nn.Sequential(
            nn.Linear(base_dim, base_dim),
            nn.ELU(),
            nn.Dropout(p=cfg.get('do_rate', 0.2)),
            nn.Linear(base_dim, base_dim),
            nn.ELU(),
            nn.Dropout(p=cfg.get('do_rate', 0.2)),
            nn.Linear(base_dim, base_dim),
            nn.ELU(),
            nn.Dropout(p=cfg.get('do_rate', 0.2))
        )
        self.DNN.apply(init_weights)

    def forward(self, x):
        logits = self.DNN(x)
        return logits


class PrpsyNetwork(nn.Module):
    """propensity network"""
    def __init__(self, base_dim, cfg):
        super(PrpsyNetwork, self).__init__()
        self.baseModel = BaseModel(base_dim, cfg)
        self.logitLayer = nn.Linear(base_dim, 1)
        self.sigmoid = nn.Sigmoid()
        self.logitLayer.apply(init_weights)

    def forward(self, inputs):
        inputs = self.baseModel(inputs)
        p = self.logitLayer(inputs)
        return p


class Mu0Network(nn.Module):
    def __init__(self, base_dim, cfg):
        super(Mu0Network, self).__init__()
        self.baseModel = BaseModel(base_dim, cfg)
        self.logitLayer = nn.Linear(base_dim, 1)
        self.logitLayer.apply(init_weights)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, inputs):
        inputs = self.baseModel(inputs)
        p = self.logitLayer(inputs)
        return p


class Mu1Network(nn.Module):
    def __init__(self, base_dim, cfg):
        super(Mu1Network, self).__init__()
        self.baseModel = BaseModel(base_dim, cfg)
        self.logitLayer = nn.Linear(base_dim, 1)
        self.logitLayer.apply(init_weights)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, inputs):
        inputs = self.baseModel(inputs)
        p = self.logitLayer(inputs)
        return p


class TauNetwork(nn.Module):
    """pseudo tau network"""
    def __init__(self, base_dim, cfg):
        super(TauNetwork, self).__init__()
        self.baseModel = BaseModel(base_dim, cfg)
        self.logitLayer = nn.Linear(base_dim, 1)
        self.logitLayer.apply(init_weights)
        self.tanh = nn.Tanh()

    def forward(self, inputs):
        inputs = self.baseModel(inputs)
        tau_logit = self.logitLayer(inputs)
        return tau_logit


class DESCN(nn.Module):
    """DESCN (Deep End-to-end Stochastic Causal Network)"""
    def __init__(self, input_dim, share_dim, base_dim, do_rate, device, batch_norm=False, normalization="none"):
        super(DESCN, self).__init__()
        # Конфигурация модели
        cfg = {
            'do_rate': do_rate,
            'BatchNorm1d': 'true' if batch_norm else 'false',
            'normalization': normalization
        }
        
        # Компоненты модели
        self.shareNetwork = ShareNetwork(input_dim, share_dim, base_dim, cfg, device)
        self.prpsy_network = PrpsyNetwork(base_dim, cfg)
        self.mu1_network = Mu1Network(base_dim, cfg)
        self.mu0_network = Mu0Network(base_dim, cfg)
        self.tau_network = TauNetwork(base_dim, cfg)
        
        self.cfg = cfg
        self.device = device
        self.to(device)

    def forward(self, inputs):
        shared_h = self.shareNetwork(inputs)

        # propensity output_logit
        p_prpsy_logit = self.prpsy_network(shared_h)
        p_prpsy = torch.clip(torch.sigmoid(p_prpsy_logit), 0.001, 0.999)

        # logit for mu1, mu0
        mu1_logit = self.mu1_network(shared_h)
        mu0_logit = self.mu0_network(shared_h)

        # pseudo tau
        tau_logit = self.tau_network(shared_h)

        p_mu1 = sigmod2(mu1_logit)
        p_mu0 = sigmod2(mu0_logit)
        p_h1 = p_mu1  # Refer to the naming in TARnet/CFR
        p_h0 = p_mu0  # Refer to the naming in TARnet/CFR

        # entire space
        p_estr = torch.mul(p_prpsy, p_h1)
        p_i_prpsy = 1 - p_prpsy
        p_escr = torch.mul(p_i_prpsy, p_h0)
        
        # Рассчитываем аплифт (эффект воздействия)
        uplift = mu1_logit - mu0_logit

        return {
            'p_prpsy_logit': p_prpsy_logit,
            'p_estr': p_estr,
            'p_escr': p_escr,
            'tau_logit': tau_logit,
            'mu1_logit': mu1_logit,
            'mu0_logit': mu0_logit,
            'p_prpsy': p_prpsy,
            'p_mu1': p_mu1,
            'p_mu0': p_mu0,
            'p_h1': p_h1,
            'p_h0': p_h0,
            'shared_h': shared_h,
            'uplift': uplift,
            'y1': mu1_logit,
            'y0': mu0_logit
        }

### INNUpliftModeling

In [8]:
from src.metric import get_auuc_v2

In [15]:
import os
import json
import time
import torch
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod
from typing import Dict, List, Union, Optional, Tuple
from torch.utils.data import DataLoader
from src.models.IModelUplift import IModelUplift
from src.datasets import TorchDataset

class INNUpliftModeling(IModelUplift):
    """
    Родительский класс для реализации нейросетевых моделей аплифт-моделирования.
    """
    
    def __init__(self, config_json=None, from_load=False, path=None):
        """
        Инициализация объекта модели.
        
        Args:
            config_json: строка с JSON-конфигурацией модели
            from_load: флаг, указывающий, что модель загружается из файла
            path: путь для загрузки модели
        """
        super().__init__(config_json, from_load, path)

        if from_load == False:
            if config_json is None:
                raise ValueError(f"No config while contstructing model.")

            if isinstance(config_json, str):
                self.config = json.loads(config_json)
            else:
                self.config = config_json
            self.model = None
            self.device = torch.device('cuda' if torch.cuda.is_available() and self.config.get('use_gpu', True) else 'cpu')
            self._initialize_model()
            self._setup_optimizer_and_scheduler()
        else:
            if path is None:
                raise ValueError(f"No config or model paths while contstructing model.")
            self.load(path)
    
    def _initialize_model(self):
        pass
    
    def _setup_optimizer_and_scheduler(self):
        """
        Инициализация оптимизатора и планировщика лр.
        """
        optimizer_config = self.config.get('optimizer', {})
        optimizer_name = optimizer_config.get('name', 'Adam')
        lr = optimizer_config.get('lr', 0.001)
        weight_decay = optimizer_config.get('weight_decay', 0.0)
        
        if optimizer_name == 'Adam':
            self.optimizer = torch.optim.Adam(
                self.model.parameters(), 
                lr=lr, 
                weight_decay=weight_decay
            )
        elif optimizer_name == 'SGD':
            momentum = optimizer_config.get('momentum', 0.9)
            self.optimizer = torch.optim.SGD(
                self.model.parameters(), 
                lr=lr, 
                momentum=momentum, 
                weight_decay=weight_decay
            )
        elif optimizer_name == 'AdamW':
            self.optimizer = torch.optim.AdamW(
                self.model.parameters(), 
                lr=lr, 
                weight_decay=weight_decay
            )
        else:
                self.optimizer = torch.optim.AdamW(
                self.model.parameters(), 
                lr=lr, 
                weight_decay=weight_decay
            )
        
        scheduler_config = self.config.get('scheduler', {})
        scheduler_name = scheduler_config.get('name')
        
        if scheduler_name == 'ReduceLROnPlateau':
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                mode=scheduler_config.get('mode', 'min'),
                factor=scheduler_config.get('factor', 0.1),
                patience=scheduler_config.get('patience', 10),
                verbose=scheduler_config.get('verbose', True)
            )
        elif scheduler_name == 'CosineAnnealingLR':
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                self.optimizer,
                T_max=scheduler_config.get('T_max', 100),
                eta_min=scheduler_config.get('eta_min', 0)
            )
        elif scheduler_name is None:
            self.scheduler = None
        else:
            raise ValueError(f"Unsupported scheduler: {scheduler_name}")
    
    def _get_data_loader(self, X: TorchDataset, batch_size=None, shuffle=False):
        """
        Создание дата лоадера.
        """
        if batch_size is None:
            batch_size = self.config.get('batch_size', 32)
            
        return DataLoader(
            X, 
            batch_size=batch_size, 
            shuffle=shuffle, 
            num_workers=self.config.get('num_workers', 0)
        )
    
    
    def _compute_loss(self, outputs, outcome, treatment):
        """
        Вычисление лосса
        """
        pass

    def fit(self, X_train: TorchDataset):
        """
        Обучение модели с валидацией.            
        История обучения (словарь с метриками по эпохам)
        """
        train_size = int(0.8 * len(X_train))
        val_size = len(X_train) - train_size
        X_train, X_val = torch.utils.data.random_split(X_train, [train_size, val_size])
        
        self.model.train()
        
        epochs = self.config.get('epochs', 20)
        batch_size = self.config.get('batch_size', 32)
        early_stopping_patience = self.config.get('early_stopping_patience', 10)
        accumulation_steps = self.config.get('gradient_accumulation_steps', 1)
        effective_batch_size = batch_size * accumulation_steps
        
        train_loader = self._prepare_data_loader(X_train, batch_size, shuffle=True)
        val_loader = self._prepare_data_loader(X_val, batch_size, shuffle=False)
        
        best_val_loss = float('inf')
        best_val_auuc = float('-inf')
        early_stopping_criterion = self.config.get('early_stopping_criterion', 'loss')  # 'loss' или 'auuc'
        patience_counter = 0
        
        history = {
            'epoch': [],
            'train_loss': [],
            'val_loss': [],
            'val_auuc': [],
            'learning_rate': []
        }
        
        for epoch in range(epochs):
            epoch_loss = 0.0
            num_batches = 0
            
            self.model.train()
            
            for batch in train_loader:
                features, treatment, outcome = batch                
                self.optimizer.zero_grad()
                
                outputs = self._forward_pass(features, treatment)
                
                loss = self._compute_loss(outputs, outcome, treatment) / accumulation_steps
                loss.backward()
                
                epoch_loss += loss.item() * accumulation_steps
                num_batches += 1
                
                if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(train_loader):
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    
                    if (batch_idx + 1) % max(1, len(train_loader) // 10) == 0:
                        progress = (batch_idx + 1) / len(train_loader) * 100
                        current_loss = epoch_loss / num_batches
                        print(f"Epoch {epoch+1}/{epochs} - {progress:.1f}% - Loss: {current_loss:.4f}")
             
            avg_train_loss = epoch_loss / num_batches

            # ---- validation ----
            val_loss, val_auuc = self._evaluate(val_loader)
            
            if self.scheduler is not None:
                if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                    self.scheduler.step(val_loss)
                else:
                    self.scheduler.step()

            
            history['epoch'].append(epoch + 1)
            history['train_loss'].append(avg_train_loss)
            history['val_loss'].append(val_loss)
            history['val_auuc'].append(val_auuc)
            history['learning_rate'].append(self.optimizer.param_groups[0]['lr'])
            
            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, "
                  f"Val Loss: {val_loss:.4f}, Val AUUC: {val_auuc:.4f}, "
                  f"LR: {self.optimizer.param_groups[0]['lr']:.6f}")
            
            if early_stopping_criterion == 'loss' and val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                
                best_model_state = {name: param.clone() for name, param in self.model.state_dict().items()}
            elif early_stopping_criterion == 'auuc' and val_auuc > best_val_auuc:
                best_val_auuc = val_auuc
                patience_counter = 0
                
                best_model_state = {name: param.clone() for name, param in self.model.state_dict().items()}
            else:
                patience_counter += 1
                if patience_counter >= early_stopping_patience:
                    print(f"Early stopping triggered after {epoch+1} epochs")
                    
                    self.model.load_state_dict(best_model_state)
                    break
        
        return history

    def _evaluate(self, data_loader):
        """
        loss и AUUC на валидационном или тестовом наборе.
        """
        self.model.eval()
        
        total_loss = 0.0
        num_batches = 0
        
        all_uplift_scores = []
        all_treatments = []
        all_outcomes = []
        
        with torch.no_grad():
            for batch in data_loader:
                features, treatment, outcome = batch
            
                outputs = self._forward_pass(features, treatment)                
                loss = self._compute_loss(outputs, outcome, treatment)
                total_loss += loss.item()
                num_batches += 1
                
                # Сбор данных для расчета AUUC
                all_uplift_scores.append(uplift_scores.cpu())
                all_treatments.append(treatment.cpu())
                all_outcomes.append(outcome.cpu())
        
        avg_loss = total_loss / num_batches
        auuc = float('nan')
    
        uplift_scores = torch.cat(all_uplift_scores, dim=0).numpy().flatten()
        treatments = torch.cat(all_treatments, dim=0).numpy().flatten()
        outcomes = torch.cat(all_outcomes, dim=0).numpy().flatten()                
        auuc = get_auuc_v2(uplift_scores, treatments, outcomes)
        
        return avg_loss, auuc
        
    def predict(self, X: TorchDataset):
        """
        Предсказание вероятностей и аплифт-скоров.
        """
        self.model.eval()
        batch_size = self.config.get('inference_batch_size', 32)
        data_loader = self._prepare_data_loader(X, batch_size, shuffle=False)
        
        y0_list, y1_list, uplift_list = [], [], []
        
        with torch.no_grad():
            for batch in data_loader:
                features, treatment, _ = batch
                
                outputs = self.model(features)
                
                y0_list.append(outputs['y0'].cpu())
                y1_list.append(outputs['y1'].cpu())
                uplift_list.append(outputs['uplift'].cpu())
        y0 = torch.cat(y0_list, dim=0).numpy()
        y1 = torch.cat(y1_list, dim=0).numpy()
        uplift = torch.cat(uplift_list, dim=0).numpy()
        
        return {
            'y0': y0,
            'y1': y1,
            'uplift': uplift
        }
    
    def predict_light(self, X: TorchDataset):
        """
        Легкая версия предсказания (без возврата значений).
        """
        # self.model.eval()
        
        # batch_size = self.config.get('inference_batch_size', 64)
        # data_loader = self._prepare_data_loader(X, batch_size, shuffle=False)
        
        # with torch.no_grad():
        #     for batch in data_loader:
        #         features, treatment, _ = batch
        #         _ = self.model(features)
        pass
    
    def save(self, path):
        """
        Сохранение модели в файл.
        """
        os.makedirs(os.path.dirname(path), exist_ok=True)
        
        # Подготовка данных для сохранения
        save_data = {
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'config': self.config
        }
        
        if self.scheduler is not None:
            save_data['scheduler_state_dict'] = self.scheduler.state_dict()
        
        torch.save(save_data, path)
    
    def load(self, path):
        """
        Загрузка модели из файла.
        """
        if not os.path.exists(path):
            raise FileNotFoundError(f"Model file not found: {path}")
            
        checkpoint = torch.load(path, map_location=lambda storage, loc: storage)
        self.config = checkpoint['config']
        
        self.device = torch.device('cuda' if torch.cuda.is_available() and 
                                  self.config.get('use_gpu', True) else 'cpu')
        
        self._initialize_model()
        self.model.load_state_dict(checkpoint['model_state_dict'])
        
        self._setup_optimizer_and_scheduler()
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
        if 'scheduler_state_dict' in checkpoint and self.scheduler is not None:
            self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    def measure_inference_time(self, data, batch_size, max_size=None):
        """
        Измерение среднего времени инференса модели на данных.
        """
        max_size = 5000
        batch_size=32
        indices = torch.randperm(len(data))[:max_size]
        subset_data = torch.utils.data.Subset(data, indices)
        data_loader = self._prepare_data_loader(subset_data, batch_size, shuffle=False)
        
        self.model.eval()
        
        # Измерение времени
        inference_times = []
    
        cur_size = 0
        for batch in data_loader:
            start_time = time.time()
            predictions = self.forward(batch)
            end_time = time.time() 
            
            inference_times.append((end_time - start_time) * 1000 / batch_size)
    
            cur_size += batch_size
            if cur_size >= max_size:
                break
    
        mean_inference_time = np.mean(inference_times)
        return mean_inference_time
    
    @staticmethod
    def generate_config(count, **params):
        """
        Генерация набора конфигураций для различных моделей.
        
        Args:
            count: количество конфигураций для генерации
            **params: дополнительные параметры и диапазоны для конфигураций
            
        Returns:
            Список словарей с конфигурациями
        """
        configs = []
        
        # Базовая конфигурация
        base_config = {
            'batch_size': 64,
            'epochs': 100,
            'early_stopping_patience': 10,
            'optimizer': {
                'name': 'Adam',
                'lr': 0.001,
                'weight_decay': 0.0001
            },
            'scheduler': {
                'name': 'ReduceLROnPlateau',
                'patience': 5,
                'factor': 0.5
            },
            'use_gpu': True,
            'num_workers': 2,
            'inference_batch_size': 128
        }
        
        # Объединение базовой конфигурации с переданными параметрами
        for key, value in params.items():
            if isinstance(value, list):
                # Если передан список значений, будем перебирать их
                base_config[key] = value[0]  # Используем первое значение как базовое
            else:
                base_config[key] = value
        
        # Генерация вариаций конфигураций
        for i in range(count):
            config = base_config.copy()
            
            # Модификация конфигурации на основе переданных параметров
            for key, value in params.items():
                if isinstance(value, list):
                    # Выбираем случайное значение из списка
                    config[key] = np.random.choice(value)
                elif isinstance(value, tuple) and len(value) == 2:
                    # Если передан диапазон (min, max), генерируем случайное значение
                    min_val, max_val = value
                    if isinstance(min_val, int) and isinstance(max_val, int):
                        config[key] = np.random.randint(min_val, max_val + 1)
                    else:
                        config[key] = np.random.uniform(min_val, max_val)
            
            configs.append(config)
        
        return configs


DESCNUplift

In [62]:
class DESCNUpliftModel(INNUpliftModeling):
    """
    Реализация модели DESCN для аплифт-моделирования.
    """
    
    def _initialize_model(self):
        """
        Инициализация архитектуры модели DESCN.
        """
        input_dim = self.config.get('input_dim')
        share_dim = self.config.get('share_dim', 128)
        base_dim = self.config.get('base_dim', 64)
        do_rate = self.config.get('do_rate', 0.2)
        batch_norm = self.config.get('batch_norm', False)
        normalization = self.config.get('normalization', 'none')
        
        # Проверка наличия обязательных параметров
        if input_dim is None:
            raise ValueError("input_dim must be specified in the config")
        
        # Инициализация модели
        self.model = DESCN(
            input_dim=input_dim,
            share_dim=share_dim,
            base_dim=base_dim,
            do_rate=do_rate,
            device=self.device,
            batch_norm=batch_norm,
            normalization=normalization
        )
    
    def _compute_loss(self, outputs, outcome, treatment):
        """
        Вычисление функции потерь для DESCN.
        
        Args:
            outputs: выход модели
            outcome: целевая переменная
            treatment: индикатор воздействия
            
        Returns:
            Значение функции потерь
        """
        # Извлечение необходимых выходов модели
        mu1_logit = outputs['mu1_logit']
        mu0_logit = outputs['mu0_logit']
        p_prpsy_logit = outputs['p_prpsy_logit']
        
        # Веса для разных компонентов потери
        factual_loss_weight = self.config.get('factual_loss_weight', 1.0)
        propensity_loss_weight = self.config.get('propensity_loss_weight', 0.1)
        tau_loss_weight = self.config.get('tau_loss_weight', 0.1)
        
        # Формируем маски для групп воздействия и контроля
        treatment_mask = (treatment == 1).float().unsqueeze(1)
        control_mask = (treatment == 0).float().unsqueeze(1)
        
        # Фактическая потеря - MSE для фактических наблюдений
        y_pred = treatment_mask * mu1_logit + control_mask * mu0_logit
        factual_loss = F.mse_loss(y_pred, outcome.unsqueeze(1))
        
        # Потеря для предсказания вероятности назначения воздействия
        propensity_loss = F.binary_cross_entropy_with_logits(
            p_prpsy_logit.squeeze(), 
            treatment
        )
        
        # Потеря для предсказания эффекта воздействия (если известно)
        if self.config.get('use_tau_loss', False) and hasattr(self, 'tau_true'):
            tau_loss = F.mse_loss(outputs['tau_logit'], self.tau_true)
        else:
            tau_loss = torch.tensor(0.0, device=self.device)
        
        # Общая потеря
        total_loss = (
            factual_loss_weight * factual_loss + 
            propensity_loss_weight * propensity_loss + 
            tau_loss_weight * tau_loss
        )
        
        return total_loss
    
    def _process_prediction_outputs(self, outputs):
        """
        Обработка выходов модели для предсказания.
        
        Args:
            outputs: выходы модели
            
        Returns:
            Словарь с предсказанными значениями
        """
        # Выделяем и преобразуем нужные для предсказания поля
        return {
            'y0': outputs['mu0_logit'],
            'y1': outputs['mu1_logit'],
            'uplift': outputs['uplift'],
            'propensity': outputs['p_prpsy']
        }
    
    @staticmethod
    def generate_config(count, **params):
        """
        Генерация конфигураций для DESCN модели.
        
        Args:
            count: количество конфигураций
            **params: дополнительные параметры
            
        Returns:
            Список конфигураций
        """
        # Базовые параметры для DESCN
        descn_params = {
            'input_dim': 100,             # Должно быть задано в соответствии с данными
            'share_dim': [256, 256], # Варианты размерности общих слоев
            'base_dim': [256],   # Варианты размерности базовых слоев
            'do_rate': [0.1, 0.2, 0.3],  # Варианты dropout
            'batch_norm': [True, False], # Использование BatchNorm
            'normalization': ['none', 'divide'], # Тип нормализации
            'factual_loss_weight': [0.8, 1.0, 1.2], # Вес фактической потери
            'propensity_loss_weight': [0.05, 0.1, 0.2], # Вес потери пропенсити
            'tau_loss_weight': [0.05, 0.1, 0.2],    # Вес потери tau (если применимо)
            'gradient_accumulation_steps' : 2
        }
        
        # Объединение с переданными параметрами
        for key, value in params.items():
            descn_params[key] = value
        
        # Генерация конфигураций с использованием базового метода
        return INNUpliftModeling.generate_config(count, **descn_params)

    def num_params(self):
        return sum([p.numel() for p in self.model.parameters() if p.requires_grad])

In [59]:
kek = DESCNUpliftModel.generate_config(count=3)
for a in kek:
    print(a)

{'batch_size': 64, 'epochs': 100, 'early_stopping_patience': 10, 'optimizer': {'name': 'Adam', 'lr': 0.001, 'weight_decay': 0.0001}, 'scheduler': {'name': 'ReduceLROnPlateau', 'patience': 5, 'factor': 0.5}, 'use_gpu': True, 'num_workers': 2, 'inference_batch_size': 128, 'input_dim': 100, 'share_dim': 256, 'base_dim': 256, 'do_rate': 0.2, 'batch_norm': False, 'normalization': 'none', 'factual_loss_weight': 0.8, 'propensity_loss_weight': 0.1, 'tau_loss_weight': 0.1}
{'batch_size': 64, 'epochs': 100, 'early_stopping_patience': 10, 'optimizer': {'name': 'Adam', 'lr': 0.001, 'weight_decay': 0.0001}, 'scheduler': {'name': 'ReduceLROnPlateau', 'patience': 5, 'factor': 0.5}, 'use_gpu': True, 'num_workers': 2, 'inference_batch_size': 128, 'input_dim': 100, 'share_dim': 256, 'base_dim': 256, 'do_rate': 0.1, 'batch_norm': False, 'normalization': 'none', 'factual_loss_weight': 0.8, 'propensity_loss_weight': 0.2, 'tau_loss_weight': 0.05}
{'batch_size': 64, 'epochs': 100, 'early_stopping_patience': 

In [22]:
from src.utils import get_paths_train_test, train_test_model
from src.factory import SModelFactory

Failed to import duecredit due to No module named 'duecredit'


In [None]:
class DESCNUpliftModelFactory(IFactory):
    @staticmethod
    def create(config_json, train_path, test_path):
        model = DESCNUpliftModel(config_json)
        train = NumpyDataset(train_path)
        test = NumpyDataset(test_path)
        return model, train, test

In [60]:
model = DESCNUpliftModel(kek[0])



In [61]:
model.num_params()

947972

In [52]:
sum([p.numel() for p in model.model.parameters() if p.requires_grad])

930564

In [45]:
model.model.parameters

<generator object Module.parameters at 0x7f2b6df1f3e0>

In [42]:
model.config

{'batch_size': 64,
 'epochs': 100,
 'early_stopping_patience': 10,
 'optimizer': {'name': 'Adam', 'lr': 0.001, 'weight_decay': 0.0001},
 'scheduler': {'name': 'ReduceLROnPlateau', 'patience': 5, 'factor': 0.5},
 'use_gpu': True,
 'num_workers': 2,
 'inference_batch_size': 128,
 'input_dim': 32,
 'share_dim': 128,
 'base_dim': 64,
 'do_rate': 0.2,
 'batch_norm': True,
 'normalization': 'divide',
 'factual_loss_weight': 1.2,
 'propensity_loss_weight': 0.1,
 'tau_loss_weight': 0.1}

In [39]:
model.model

DESCN(
  (shareNetwork): ShareNetwork(
    (DNN): Sequential(
      (0): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=32, out_features=128, bias=True)
      (2): ELU(alpha=1.0)
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=128, out_features=128, bias=True)
      (5): ELU(alpha=1.0)
      (6): Dropout(p=0.2, inplace=False)
      (7): Linear(in_features=128, out_features=64, bias=True)
      (8): ELU(alpha=1.0)
      (9): Dropout(p=0.2, inplace=False)
    )
  )
  (prpsy_network): PrpsyNetwork(
    (baseModel): BaseModel(
      (DNN): Sequential(
        (0): Linear(in_features=64, out_features=64, bias=True)
        (1): ELU(alpha=1.0)
        (2): Dropout(p=0.2, inplace=False)
        (3): Linear(in_features=64, out_features=64, bias=True)
        (4): ELU(alpha=1.0)
        (5): Dropout(p=0.2, inplace=False)
        (6): Linear(in_features=64, out_features=64, bias=True)
        (7): ELU(alpha=1.0

In [64]:
DESCNUpliftModel

__main__.DESCNUpliftModel

In [None]:
from src.utils import get_paths_train_test, train_test_model
from src.factory import XModelFactory
from src.configs_generation import generate_random_configs_xmodel
from tqdm import tqdm
configs = generate_random_configs_xmodel(parameters, count=1)
ds_name = 'lazada'
features_percent = 50
factory = XModelFactory
config = configs[0]
batch_size=32
max_size=100000
train_path, test_path = get_paths_train_test(ds_name=ds_name, features_percent=features_percent)
model, train, test = factory.create(config, train_path, test_path)