In [2]:
import numpy as np
import pandas as pd

from typing import List, Literal
from datetime import datetime, timedelta
from tqdm.notebook import tqdm

from dataclasses import dataclass
from abc import ABC, abstractmethod

In [81]:
@dataclass
class Provider:
    id: int
    commission: float
    conversion: float
    min_sum: float
    max_sum: float
    limit_max: float
    limit_min: float
    processing_time: float
    currency: float

    total_amount: int = 0

    def __repr__(self):
        return str(self.__dict__)

@dataclass
class Payment:
    time: pd.Timestamp
    amount: float
    currency: str
    payment: str
    token: str

    def __repr__(self):
        return str(self.__dict__)

In [83]:
def compute_expected_processing_time(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)

        values.append(np.sum([provider.processing_time for provider in chain[:k + 1]]))

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)


def compute_expected_conversion(chain: List[Provider]) -> float:
    return 1 - np.prod([1 - provider.conversion for provider in chain])


def compute_expected_commission(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)
        values.append(chain[k].commission)

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)

In [84]:
class BaseStrategy:

    def __init__(self):
        pass

    @abstractmethod
    def optimize(self, providers: List[Provider]) -> List[Provider]:
        """
        Возвращает оптимальный порядок провайдеров.
        """
        raise NotImplementedError
    

class GreedyStrategy(BaseStrategy):

    def __init__(self, by: Literal["commission", "processing_time", "conversion"] = "commission"):
        super().__init__()

        self.by = by

    def optimize(self, providers: List[Provider]) -> List[Provider]:
        return sorted(providers, key=lambda provider: getattr(provider, self.by))

In [4]:
payments = pd.read_csv("data/payments_1.csv")

payments.eventTimeRes = pd.to_datetime(payments.eventTimeRes)
payments = payments.sort_values(by="eventTimeRes")


providers = pd.read_csv("data/providers_1.csv")
providers.TIME = pd.to_datetime(providers.TIME)

providers = providers.sort_values(by="TIME")
providers = providers.drop_duplicates(subset=["TIME", "ID"], keep="last")

providers.index = range(len(providers))

currencies = pd.read_csv("data/ex_rates.csv")
currencies = {row["destination"]: row["rate"] for _, row in currencies.iterrows()}

Unnamed: 0,rate,destination
0,0.586865,AZN
1,1.041829,EUR
2,0.128461,HKD
3,0.000712,KRW
4,0.65,AUD
5,0.048951,MXN
6,0.263785,PEN
7,0.009587,RUB
8,0.172383,BRL
9,0.006461,JPY


In [97]:
class Simulator:

    def __init__(self, payments: pd.DataFrame, providers: pd.DataFrame, strategy: BaseStrategy = GreedyStrategy()):
        self.payments = payments
        self.providers = providers

        self.strategy = strategy

    def simulate(self, verbose: bool = True):        
        history = []

        for _, payment in tqdm(payments.iterrows(), total=len(payments), disable=not verbose):
            payment = Payment(*payment)

            available_providers = self._get_available_providers(payment)
            
            if len(available_providers) > 0:
                optimized_providers = self.strategy.optimize(available_providers)

                expected_commission = compute_expected_commission(optimized_providers)
                expected_conversion = compute_expected_conversion(optimized_providers)

                expected_processing_time = compute_expected_processing_time(optimized_providers)

            else:
                optimized_providers = []
                
                expected_commission = 0
                expected_conversion = 0
                expected_processing_time = 0
            
            history.append({
                "payment": payment,
                "providers": optimized_providers,
                "metrics": {
                    "expected_commission": expected_commission,
                    "expected_conversion": expected_conversion,
                    "expected_processing_time": expected_processing_time
                }
            })

        return history

    def _get_available_providers(self, payment: Payment) -> List[Provider]:
        available_providers = self.providers[
            (payment.time >= self.providers.TIME) &
            (payment.time <= (self.providers.TIME + timedelta(hours=1))) &
            (payment.amount >= self.providers.MIN_SUM) &
            (payment.currency == self.providers.CURRENCY) &
            (payment.amount <= self.providers.MAX_SUM)
        ].drop_duplicates("ID")

        if len(available_providers) > 0:
            assert len(available_providers) == available_providers.ID.nunique()

        available_providers = [Provider(id=provider.ID, 
                                        commission=provider.COMMISSION, 
                                        conversion=provider.CONVERSION, 
                                        processing_time=provider.AVG_TIME,
                                        limit_min=provider.LIMIT_MIN,
                                        min_sum=provider.MIN_SUM,
                                        max_sum=provider.MAX_SUM,
                                        limit_max=provider.LIMIT_MAX,
                                        currency=provider.CURRENCY)
                                         
                               for _, provider in available_providers.iterrows()]

        return available_providers

In [98]:
sim = Simulator(payments=payments, providers=providers)

In [99]:
history = sim.simulate(verbose=True)

  0%|          | 0/193978 [00:00<?, ?it/s]

In [107]:
history[1]

{'payment': {'time': Timestamp('2024-11-24 21:00:01'), 'amount': 945.9, 'currency': 'RUB', 'payment': 'c9a03cbbe592f930d52416b049bc1d1a', 'token': '336d5ebc5436534e61d16e63ddfca327'},
 'providers': [{'id': 2, 'commission': 0.03, 'conversion': 0.5, 'min_sum': 400.0, 'max_sum': 8500.0, 'limit_max': 13400000.0, 'limit_min': 51000.0, 'processing_time': 18.0, 'currency': 'RUB', 'total_amount': 0},
  {'id': 5, 'commission': 0.03, 'conversion': 0.6, 'min_sum': 600.0, 'max_sum': 4500.0, 'limit_max': 28000000.0, 'limit_min': 31000.0, 'processing_time': 14.0, 'currency': 'RUB', 'total_amount': 0},
  {'id': 3, 'commission': 0.045, 'conversion': 0.5, 'min_sum': 900.0, 'max_sum': 6000.0, 'limit_max': 19800000.0, 'limit_min': 61000.0, 'processing_time': 16.0, 'currency': 'RUB', 'total_amount': 0},
  {'id': 4, 'commission': 0.05, 'conversion': 0.7, 'min_sum': 300.0, 'max_sum': 6500.0, 'limit_max': 23600000.0, 'limit_min': 81000.0, 'processing_time': 26.0, 'currency': 'RUB', 'total_amount': 0}],
 'met

In [5]:
import pandas as pd

In [6]:
df = pd.read_csv("../outputs.csv")

In [10]:
df.flow.value_counts()[:10]

2-3-5-4    37445
2-3-4-5     6388
13          4165
2           3946
2-3         3623
3-4-2-5     3157
0-1         3018
16-15       2421
2-3-5       2215
1-0         1721
Name: flow, dtype: int64