In [2]:
import itertools
import numpy as np
import pandas as pd

from typing import List
from datetime import datetime, timedelta
from tqdm.notebook import tqdm


In [3]:
class Provider:

    def __init__(self, id: int, commission: float, conversion: float, processing_time: float):
        self.id = id
        
        self.commission = commission
        self.conversion = conversion
        self.processing_time = processing_time

    def __repr__(self):
        return f"P{self.id}"


In [5]:
payments = pd.read_csv("data/payments_1.csv")

payments.eventTimeRes = pd.to_datetime(payments.eventTimeRes)
payments = payments.sort_values(by="eventTimeRes")


providers = pd.read_csv("data/providers_1.csv")
providers.TIME = pd.to_datetime(providers.TIME)

providers = providers.sort_values(by="TIME")
providers = providers.drop_duplicates(subset=["TIME", "ID"], keep="last")

providers.index = range(len(providers))

In [6]:
def get_available_providers(transaction: pd.Series) -> List[Provider]:
    mask = (providers.CURRENCY == transaction["cur"]) & \
           (transaction["amount"] >= providers.MIN_SUM) & \
           (transaction["amount"] <= providers.MAX_SUM)
    
    available_providers = providers.copy()[mask].drop_duplicates("ID")
    
    providers_objects: List[Provider] = []
    for k, row in available_providers.drop_duplicates(subset=["ID"]).iterrows():
       providers_objects.append(
              Provider(
              id=k + 1,
              commission=row["COMMISSION"],
              conversion=row["CONVERSION"],
              processing_time=row["AVG_TIME"]
              )
       )

    return providers_objects


In [7]:
def compute_expected_processing_time(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)

        values.append(np.sum([provider.processing_time for provider in chain[:k + 1]]))

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)


def compute_expected_conversion(chain: List[Provider]) -> float:
    return 1 - np.prod([1 - provider.conversion for provider in chain])


def compute_expected_commission(chain: List[Provider]) -> float:
    probas, values = [], []

    for k in range(len(chain)):
        multiplication = np.prod([1 - provider.conversion for provider in chain[:k]])

        probas.append(multiplication * chain[k].conversion)
        values.append(chain[k].commission)

    probas, values = np.array(probas), np.array(values)

    return probas.dot(values)


In [8]:
all_available_providers = []

for k in tqdm(range(len(payments))):
    available_providers = get_available_providers(payments.iloc[k])

    all_available_providers.append(available_providers)


  0%|          | 0/193978 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [15]:
from dataclasses import dataclass

class Provider:

    def __init__(self, id: int, commission: float, conversion: float, processing_time: float):
        self.id = id
        
        self.commission = commission
        self.conversion = conversion
        self.processing_time = processing_time

    def __repr__(self):
        return f"P{self.id}"


class Transaction:
    providers = providers
    query = "query"

    def __init__(self, payment: pd.Series):
        self.time = payment.eventTimeRes
        self.amount = payment.amount
        self.cur = payment.cur
        self.payment = payment.payment
        self.token = payment.cardToken
        self.available_providers = self._get_available_providers()

    def _get_available_providers(self) -> List[Provider]:

        available_providers = providers[
            (self.time >= providers.TIME) &
            (self.time <= (providers.TIME + timedelta(hours=1))) &
            (self.amount >= providers.MIN_SUM) &
            (self.cur == providers.CURRENCY) &
            (self.amount <= providers.MAX_SUM)]

        if len(available_providers) > 0:
            assert len(available_providers) == available_providers.ID.nunique()

        available_providers = [Provider(provider.ID, provider.COMMISSION, provider.CONVERSION, provider.TIME) 
                               for _, provider in available_providers.iterrows()]

        return available_providers

class Log:

    def __init__(self, transaction: Transaction, solution: List[Provider]):
        self.transaction = transaction
        self.solution = solution

    def compute_metrics(self) -> List[float]:
        """
        Подсчитывает метрики.
        """
        raise NotImplementedError

class Simulator:

    def __init__(self):
        self.providers: List[Provider] = None

    def simulate(self, transactions_stream: pd.DataFrame, providers_stream: pd.DataFrame):        
        self.initialize_providers()
        
        logs: List[Log] = []
        for transaction in transactions_stream.iterrows():
            #self.change_providers_states()
            transaction = Transaction(transaction[1])
            solution = self.optimize(transaction)

            self.logs.append(
                Log(
                    transaction=transaction, solution=solution
                )
            )

        return logs

    def optimize(self, transaction: Transaction) -> List[Provider]:
        """
        Возвращает упорядоченный набор провайдеров
        """
        raise NotImplementedError
    
    def initialize_providers():
        """
        Инициализирует список всех провайдеров в начальный момент времени.
        """
        raise NotImplementedError

    def change_providers_states():
        """
        Возвращает текущее состояние провайдеров.
        """
        raise NotImplementedError
    
    def get_available_providers(transaction: Transaction) -> List[Provider]:
        """
        Возвращает возможные провайдеры для транзакции.
        """
        raise NotImplementedError


In [43]:
t = Transaction(payment=payments.iloc[7])

In [44]:
p = t._get_available_providers()

In [50]:
compute_expected_conversion(p)

0.85