In [6]:
import itertools
import numpy as np
import pandas as pd

from typing import List
from datetime import datetime, timedelta
from tqdm.notebook import tqdm


In [7]:
class Provider:

    def __init__(self, id: int, commission: float, conversion: float, processing_time: float):
        self.id = id
        
        self.commission = commission
        self.conversion = conversion
        self.processing_time = processing_time

    def __repr__(self):
        return f"P{self.id}"


In [8]:
payments = pd.read_csv("data/payments_1.csv")

payments.eventTimeRes = pd.to_datetime(payments.eventTimeRes)
payments = payments.sort_values(by="eventTimeRes")


providers = pd.read_csv("data/providers_1.csv")
providers.TIME = pd.to_datetime(providers.TIME)

providers = providers.sort_values(by="TIME")
providers = providers.drop_duplicates(subset=["TIME", "ID"], keep="last")

providers.index = range(len(providers))


In [9]:
def get_available_providers(transaction: pd.Series) -> List[Provider]:
    mask = (providers.CURRENCY == transaction["cur"]) & \
           (transaction["amount"] >= providers.MIN_SUM) & \
           (transaction["amount"] <= providers.MAX_SUM)
    
    available_providers = providers.copy()[mask].drop_duplicates("ID")
    
    providers_objects: List[Provider] = []
    for k, row in available_providers.drop_duplicates(subset=["ID"]).iterrows():
       providers_objects.append(
              Provider(
              id=k + 1,
              commission=row["COMMISSION"],
              conversion=row["CONVERSION"],
              processing_time=row["AVG_TIME"]
              )
       )

    return providers_objects


In [10]:
def compute_expected_processing_time(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)

        values.append(np.sum([provider.processing_time for provider in chain[:k + 1]]))

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)


def compute_expected_conversion(chain: List[Provider]) -> float:
    return 1 - np.prod([1 - provider.conversion for provider in chain])


def compute_expected_commission(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)
        values.append(chain[k].commission)

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)


In [None]:
all_available_providers = []

for k in tqdm(range(len(payments))):
    available_providers = get_available_providers(payments.iloc[k])

    all_available_providers.append(available_providers)


  0%|          | 0/193978 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [12]:
providers


Unnamed: 0,TIME,ID,CONVERSION,AVG_TIME,MIN_SUM,MAX_SUM,LIMIT_MIN,LIMIT_MAX,LIMIT_BY_CARD,COMMISSION,CURRENCY
0,2024-11-24 21:00:00,0,0.50,18.0,700.0,5500.0,61000.0,12900000.0,-,0.035,UZS
1,2024-11-24 21:00:00,26,0.65,26.0,700.0,6000.0,31000.0,18600000.0,-,0.045,MYR
2,2024-11-24 21:00:00,27,0.75,18.0,100.0,4500.0,21000.0,5000000.0,-,0.040,MYR
3,2024-11-24 21:00:00,28,0.65,22.0,200.0,9000.0,61000.0,20900000.0,-,0.045,MYR
4,2024-11-24 21:00:00,29,0.65,14.0,100.0,9000.0,81000.0,14700000.0,-,0.030,USD
...,...,...,...,...,...,...,...,...,...,...,...
446,2024-11-25 21:00:00,2,0.55,8.0,800.0,8000.0,81000.0,16500000.0,-,0.027,RUB
447,2024-11-25 21:00:00,0,0.60,14.0,400.0,6500.0,1000.0,5000000.0,-,0.035,UZS
448,2024-11-25 21:00:00,38,0.50,24.0,800.0,4500.0,21000.0,32700000.0,-,0.035,GHS
449,2024-11-25 21:00:00,14,0.50,22.0,500.0,5000.0,1000.0,18900000.0,-,0.045,BRL


In [51]:
from dataclasses import dataclass

class Provider:

    def __init__(self, id: int, commission: float, conversion: float, processing_time: float):
        self.id = id
        
        self.commission = commission
        self.conversion = conversion
        self.processing_time = processing_time

    def __repr__(self):
        return f"P{self.id}"


class Transaction:
    providers = providers
    query = "query"

    def __init__(self, payment: pd.Series):
        self.time = payment.eventTimeRes
        self.amount = payment.amount
        self.cur = payment.cur
        self.payment = payment.payment
        self.token = payment.cardToken
        self.available_providers = self._get_available_providers()

    def _get_available_providers(self) -> List[Provider]:

        available_providers = providers[(self.time >= providers.TIME) &
           (self.time <= (providers.TIME + timedelta(hours=1))) &
            (self.amount >= providers.MIN_SUM) &
            (self.amount <= providers.MAX_SUM)]
    
        if len(available_providers) > 0:
            assert len(available_providers) == available_providers.ID.nunique()

        available_providers = [Provider(provider.ID, provider.COMMISSION, provider.CONVERSION, provider.TIME) 
                               for _, provider in available_providers.iterrows()]

        return available_providers

class Log:

    def __init__(self, transaction: Transaction, solution: List[Provider]):
        self.transaction = transaction
        self.solution = solution

    def compute_metrics(self) -> List[float]:
        """
        Подсчитывает метрики.
        """
        raise NotImplementedError

class Simulator:

    def __init__(self):
        self.providers: List[Provider] = None

    def simulate(self, transactions_stream: pd.DataFrame, providers_stream: pd.DataFrame):        
        self.initialize_providers()
        
        logs: List[Log] = []
        for transaction in transactions_stream.iterrows():
            #self.change_providers_states()
            transaction = Transaction(transaction[1])
            solution = self.optimize(transaction)

            self.logs.append(
                Log(
                    transaction=transaction, solution=solution
                )
            )

        return logs

    def optimize(self, transaction: Transaction) -> List[Provider]:
        """
        Возвращает упорядоченный набор провайдеров
        """
        raise NotImplementedError
    
    def initialize_providers():
        """
        Инициализирует список всех провайдеров в начальный момент времени.
        """
        raise NotImplementedError

    def change_providers_states():
        """
        Возвращает текущее состояние провайдеров.
        """
        raise NotImplementedError
    
    def get_available_providers(transaction: Transaction) -> List[Provider]:
        """
        Возвращает возможные провайдеры для транзакции.
        """
        raise NotImplementedError


In [52]:
payment[1]


700.0

In [None]:
for _, row in tr.available_providers.iterrows():
    break


In [50]:
row.COMMISSION


0.035

In [53]:
tr = Transaction(payment)


In [38]:
for p in tr.available_providers.iterrows():
    p[1]


In [39]:
p[1]


TIME             2024-11-24 21:00:00
ID                                21
CONVERSION                       0.5
AVG_TIME                        16.0
MIN_SUM                        100.0
MAX_SUM                       5000.0
LIMIT_MIN                    91000.0
LIMIT_MAX                 25100000.0
LIMIT_BY_CARD                      -
COMMISSION                      0.04
CURRENCY                         JPY
Name: 47, dtype: object

In [30]:
tr.available_providers.ID.nunique()


39

In [12]:
providers


Unnamed: 0,TIME,ID,CONVERSION,AVG_TIME,MIN_SUM,MAX_SUM,LIMIT_MIN,LIMIT_MAX,LIMIT_BY_CARD,COMMISSION,CURRENCY
0,2024-11-24 21:00:00,0,0.50,18.0,700.0,5500.0,61000.0,12900000.0,-,0.035,UZS
1,2024-11-24 21:00:00,26,0.65,26.0,700.0,6000.0,31000.0,18600000.0,-,0.045,MYR
2,2024-11-24 21:00:00,27,0.75,18.0,100.0,4500.0,21000.0,5000000.0,-,0.040,MYR
3,2024-11-24 21:00:00,28,0.65,22.0,200.0,9000.0,61000.0,20900000.0,-,0.045,MYR
4,2024-11-24 21:00:00,29,0.65,14.0,100.0,9000.0,81000.0,14700000.0,-,0.030,USD
...,...,...,...,...,...,...,...,...,...,...,...
446,2024-11-25 21:00:00,2,0.55,8.0,800.0,8000.0,81000.0,16500000.0,-,0.027,RUB
447,2024-11-25 21:00:00,0,0.60,14.0,400.0,6500.0,1000.0,5000000.0,-,0.035,UZS
448,2024-11-25 21:00:00,38,0.50,24.0,800.0,4500.0,21000.0,32700000.0,-,0.035,GHS
449,2024-11-25 21:00:00,14,0.50,22.0,500.0,5000.0,1000.0,18900000.0,-,0.045,BRL


In [44]:
lengths = list(map(len, all_available_providers))


In [102]:
chain = all_available_providers[123021]
chain


[P29, P30, P31, P32]

In [103]:
values = []

for permutation in itertools.permutations(chain, r=len(chain)):
    permutation = list(permutation)

    values.append(
        (compute_expected_processing_time(permutation),
         compute_expected_commission(permutation),
         compute_expected_conversion(permutation))
    )

values = np.array(values)


In [14]:
for provider in providers.iterrows():
    break


In [17]:
provider[1]


TIME             2024-11-24 21:00:00
ID                                 0
CONVERSION                       0.5
AVG_TIME                        18.0
MIN_SUM                        700.0
MAX_SUM                       5500.0
LIMIT_MIN                    61000.0
LIMIT_MAX                 12900000.0
LIMIT_BY_CARD                      -
COMMISSION                     0.035
CURRENCY                         UZS
Name: 0, dtype: object

In [105]:
values[:, 0].argmin(), values[:, 1].argmin()


(20, 4)

In [None]:
trans, [providers]
tran.time, min.max -> [final_providers]


In [113]:
optimal_chain = list(itertools.permutations(chain, r=len(chain)))[4]
optimal_chain


(P29, P32, P30, P31)

In [118]:
optimal_chain[0].processing_time


18.0

In [119]:
optimal_chain[1].processing_time


14.0

In [120]:
optimal_chain[2].processing_time


16.0

In [121]:
optimal_chain[3].processing_time


26.0