Это чтобы мы не путались. Сами знаечния инициализируются в функции __init__ (class DQN) и при вызове 

*in_size* = 6 

*hid_size* = 64

*out_size* = 1

*l_rate* = 0.001

*gamma* = 0.99

*epsilon* = 0.2

*epsilon_decay* = 0.995

*epsilon_min* = 0.05

In [None]:
pip install torch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import random
from wellplan.builder import PlanBuilder, Plan, WellPlanContext
from wellplan.services.cost import CostFunction
from wellplan.services import ArpsDeclineProductionProfile
from wellplan.data.file.loader import ExcelWellLoader
import pandas as pd
import datetime
from wellplan.services.team_manager import DistanceTeamMovement
from datetime import timedelta


Загружаем таблички

In [None]:
#loader = ExcelWellLoader(file_path = r'C:\Users\Анна\Downloads\Sirius_GazpromNeft-main\well-plan-optimization-main\data\Ввод ЭБ 1+11 Обезлич_v3.xlsx')
loader = ExcelWellLoader(file_path = r'C:\Users\Анна\Downloads\Sirius_GazpromNeft-main\well-plan-optimization-main\data\Ввод ЭБ эксперимент.xlsx')
wells = loader.load()

production_profile = ArpsDeclineProductionProfile()

coordinates = pd.read_excel(
    r'C:\Users\Анна\Downloads\Sirius_GazpromNeft-main\well-plan-optimization-main\data\База перспективного ПФ (30.01.2025).xlsx',
    header = 0,
    names = ['cluster', 'x', 'y', 'z']
)

move = DistanceTeamMovement.from_dicts(coordinates.to_dict(orient='records'))



Класс модели

In [None]:
from wellplan.services.cost import BaseCapex, BaseOpex, NPV

In [None]:
from wellplan.services.production import ProductionProfile

In [None]:
capex = BaseCapex(
    build_cost_per_metr={
        "ГС+ГРП": 25300,
        "ННС+ГРП": 12900,
        "МЗС": 27300,
        "МЗС+ГРП": 28300,
        "ГС": 23300,
    },
    equipment_cost=2500000,
)

opex = BaseOpex(
    oil_cost_per_tone=109.9,
    water_cost_per_tone=48.6,
    repair_per_year=3093900,
    maintain_per_year=2336200,
)


npv = NPV(
    oil_price_per_tone=13896,
    project_start_date=datetime.datetime.now(),
    capex_cost=capex,
    opex_cost=opex,
    discount_rate=0.125,
)


In [None]:
class DQN(nn.Module, CostFunction): #зачем CostFunction?
    #инициализируем все переменные 
    def __init__(
        self,
        in_size: int = 6,
        hid_size: int = 64,
        out_size: int = 1,
        gamma: float = 0.99,
        l_rate: float = 0.001,
        npv: NPV = None,
        profiller = None
    ):
        
        super(DQN, self).__init__()

        self.l1 = nn.Linear(in_size, hid_size)   
        self.l2 = nn.Linear(hid_size, hid_size)     

        self.out = nn.Linear(hid_size, out_size)    
        
        self.npv = npv
        self.gamma = gamma
        self.profiller = profiller

        self.optimizer = optim.Adam(self.parameters(), lr = l_rate)
        self.criterion = nn.MSELoss()

    #определяем фичи и тензорим (перевод чисел в вид, интерпритируемый моделью)
    def a_s_tensor(self, state: Plan, action: WellPlanContext) -> torch.Tensor:
        features = [
            action.well.oil_rate,
            action.well.liq_rate,
            action.well.length,
            len(action.well.tasks), 
            len(state.well_plans), 
            state.total_profit(),
        ]

        return torch.Tensor(features) 

    #двигаемся по нейронке, передаём данные по слоям
    def forward(self, x):
        x = torch.relu(self.l1(x))
        x = torch.relu(self.l2(x))
        return self.out(x)


    #тензор экшн и стейт, переводим в батч (группа примеров, которую модель обрабатывает за один шаг) и получаем вес сочетания
    def compute(self, state: Plan, action: WellPlanContext) -> WellPlanContext:
        x = self.a_s_tensor(state, action).unsqueeze(0) 

        with torch.no_grad():
            action.cost = self.forward(x).item()
        
        return action
    
    #награды
    def update(self, state: Plan, action: WellPlanContext) -> None:
        state_tensor = self.a_s_tensor(state, action).unsqueeze(0)

        prev_npv = self.npv.compute(state, action)

        new_state = copy.deepcopy(state)
        new_state.add_context(action)

        #next_npv = self.npv.compute(new_state, action)

        reward = new_state.total_profit() - prev_npv

        reward_tensor = torch.tensor([reward]).unsqueeze(0)

        q_value = self.forward(state_tensor)

        loss = self.criterion(q_value, reward_tensor)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()


In [None]:
from typing import Optional
from datetime import datetime
from loguru import logger
from wellplan.core.well import Well

from wellplan.core.team import TeamPool

from wellplan.services.risk_strategy import RiskStrategy
from wellplan.services.cost import CostFunction
from wellplan.services.infrastructure import Infrastructure, SimpleInfrastructure
from wellplan.services.team_manager import BaseTeamManager
from wellplan.services.production import ProductionProfile, LinearProductionProfile
from wellplan.services.constraint import ConstraintManager, Constraint
from wellplan.services.team_manager import TeamManager

Класс составления итогового плана

In [None]:
class DQN_go(PlanBuilder):
    #инициализируем приколы для обучения, саму модель
    def __init__(self, start, end, in_size, l_rate, epsilon, epsilon_decay, epsilon_min, npv, profiller):
        
        self.model = DQN(npv=npv, profiller=profiller)
        self.optimizer = optim.Adam(self.model.parameters(), lr = l_rate)

        super().__init__(
            start = start, 
            end = end,
            cost_function = self.model
        )

        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        
        
    #отправляем кандидатов в нейронку, на каждом кандидате сравниваем его с лучшим на данный момент (храним только инфу про лучшего)
    def _select_best_candidate(self, plan: Plan,candidates: list[WellPlanContext]) -> WellPlanContext:
        best_value = float('-inf')
        best_candidate = None

        if self.epsilon > random.random():
            return random.choice(candidates)

        for candidate in candidates:
            value = self.cost_function.compute(plan, candidate).cost

            if best_value < value:
                best_candidate = candidate
                best_value = value

        return best_candidate

    #сбор итогового плана
    def compile(
        self,
        wells: list[Well],
        manager: BaseTeamManager,
        risk_strategy: Optional[RiskStrategy] = None,
        keep_order: bool = False,
        cluster_ordered: bool = True,

    ) -> Plan:
        plan = Plan()
        self.remaining_wells = wells.copy()

        current_start = self.start

        while self.remaining_wells and current_start < self.end:
            candidates = self._build_contexts(manager, current_start)
            if not candidates:
                break

            candidates = self._filter_candidates(candidates, plan, risk_strategy, cluster_ordered)
            
            if not candidates:
                next_year = self._constraints.get_period_end(current_start.year)
                logger.info(f"No candidates for {current_start.year}, moving to {next_year or current_start.year+1}")
                
                current_start = datetime((next_year or current_start.year+1), 1, 1)
                continue


            best_candidate = self._select_best_candidate(
                plan, candidates
            )

            self.epsilon = max(self.epsilon * self.epsilon_decay, self.epsilon_min)

            logger.debug(f"Best candidate: {best_candidate.well.name}, cost: {best_candidate.cost}")
            manager.assign(best_candidate)

            self.remaining_wells.remove(best_candidate.well)

            if risk_strategy:
                risk_strategy.define_risk(best_candidate)
                
            self.cost_function.compute(plan, best_candidate)

            plan.add_context(best_candidate)

        return plan

Запуск обучения

In [10]:
agent = DQN_go(
    start = datetime.now(),
    end = datetime.now() + timedelta(25 * 365),
    in_size = 6,
    l_rate = 0.001, 
    epsilon = 1,
    epsilon_decay = 0.995,
    epsilon_min = 0.05,
    npv = npv,
    profiller = production_profile
)

plans = []

In [None]:
n_episods = 3000

results = {'episod': [], 'id': [], 'target_value': [], 'wells': []}

for episod in range(n_episods):
    team_pool = TeamPool()
    team_pool.add_teams(['ГС'], num_teams = 1)
    team_pool.add_teams(['ГРП'], num_teams = 1)

    manager = TeamManager(team_pool = team_pool)
    plan = agent.compile(wells = wells.copy(), manager = manager)

    wells_list = [str(context.well.name) for context in plan.well_plans]

    results['episod'].append(episod)
    results['id'].append(plan.id)
    results['target_value'].append(plan.total_profit())
    results['wells'].append(wells_list)

    plans.append(plan)




[32m2025-07-12 12:20:43.359[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 658, cost: -169.36460876464844[0m
[32m2025-07-12 12:20:43.364[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 6912, cost: -191.0658416748047[0m
[32m2025-07-12 12:20:43.368[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 1705, cost: -199.3600311279297[0m
[32m2025-07-12 12:20:43.371[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 4528, cost: -191.83299255371094[0m
[32m2025-07-12 12:20:43.373[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 6743, cost: -200.7330322265625[0m
[32m2025-07-12 12:20:43.375[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mcompile[0m:[36m72[0m - [34m[1mBest candidate: 5611, cost: -171.515869140

In [None]:
df = pd.DataFrame(results)

In [None]:
df = df.sort_values(by='target_value')[::-1].reset_index()
df

In [None]:
print(*plans)