In [1]:
import os
import json
import os.path
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir('..')
from dotenv import load_dotenv

load_dotenv(".env")  # take environment variables from .env.

from steam_trade_bot.containers import Container
from steam_trade_bot.settings import BotSettings
container = Container()
container.config.from_pydantic(BotSettings())
container.wire(modules=[__name__])

In [2]:
uow_ = container.repositories.unit_of_work

In [3]:
CSGO_APP_ID = 730
async with uow_() as uow:
    market_names = await uow.market_item.get_all(app_id=CSGO_APP_ID)
    knifes_and_gloves = list(filter(lambda name: '★' in name.market_hash_name, market_names))

In [4]:
len(knifes_and_gloves)

2297

In [5]:
DEFAULT_CURRENCY = 1

history = {}
orders = {}

async with uow_() as uow:
    for market_item in knifes_and_gloves:
        market_hash_name = market_item.market_hash_name
        history[market_hash_name] = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
        orders[market_hash_name] = await uow.market_item_orders.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)

In [6]:
len(history), len(orders)

(2297, 2297)

In [7]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

spark = SparkSession.builder \
    .master('local[3]') \
    .appName('myAppName') \
    .config("spark.driver.memory", "10g") \
    .config('spark.executor.memory', '10g') \
    .config("spark.cores.max", "6") \
    .getOrCreate()

sc = spark.sparkContext
sqlContext = SQLContext(sc)



In [8]:
data = [(key, (history[key], orders[key])) for key in history.keys() if history.get(key, None) and orders.get(key, None)]
rdd=sc.parallelize(data)

In [9]:
rdd.count()

1922

In [10]:
rdd.first()

('★ M9 Bayonet | Marble Fade (Minimal Wear)',
 (MarketItemSellHistory(app_id=730, market_hash_name='★ M9 Bayonet | Marble Fade (Minimal Wear)', currency=1, timestamp=datetime.datetime(2022, 11, 12, 18, 28, 6, 848768, tzinfo=datetime.timezone.utc), history='[["Apr 26 2015 01: +0",400,"1"],["May 08 2015 01: +0",396.613,"1"],["May 28 2015 01: +0",356.661,"1"],["May 29 2015 01: +0",400,"1"],["May 30 2015 01: +0",394.53,"1"],["May 31 2015 01: +0",400.016,"1"],["Jun 03 2015 01: +0",400,"1"],["Jun 14 2015 01: +0",406.019,"1"],["Jun 27 2015 01: +0",400.004,"1"],["Jun 30 2015 01: +0",398.952,"1"],["Jul 02 2015 01: +0",394.714,"1"],["Jul 13 2015 01: +0",395.096,"1"],["Jul 14 2015 01: +0",397.474,"1"],["Jul 19 2015 01: +0",399.997,"1"],["Aug 02 2015 01: +0",366.442,"1"],["Sep 12 2015 01: +0",0.045,"1"],["Sep 14 2015 01: +0",406.609,"1"],["Sep 18 2015 01: +0",410.169,"1"],["Sep 19 2015 01: +0",406.182,"1"],["Sep 27 2015 01: +0",401.862,"1"],["Oct 17 2015 01: +0",386.125,"1"],["Oct 25 2015 01: +0",

In [11]:
from datetime import datetime

def steam_date_str_to_datetime(s: str) -> datetime:
    """
    converts str like 'Mar 16 2017 01: +0' to datetime:
    """
    s = s[: s.index(":")]
    return datetime.strptime(s, "%b %d %Y %H")

In [12]:
import functools
import json
import operator
import statistics
from datetime import datetime, timedelta

from steam_trade_bot.domain.entities.market import SellHistoryAnalyzeResult, MarketItemSellHistory
from steam_trade_bot.domain.steam_fee import SteamFee


_MAX_FALL_DEVIATION = 0.05
_MEAN_MAX_THRESHOLD = 0.1
_MEAN_MIN_THRESHOLD = 0.1
_WINDOWS_SIZE = 15
_MAX_DEVIATION = 0.06
_MIN_SELLS_PER_WEEK = 10
_MIN_SELLS_PER_MONTH = 50
_QUANTILES_MIN_POINTS = 10
CURRENT_DATE = datetime(2022, 11, 18)


def percentage_diff(price1: float, price2: float) -> float:
    min_ = min(price1, price2)
    max_ = max(price1, price2)
    return (max_ - min_) / max_


def window_slicing(k, iter_):
    for i in range(0, len(iter_) - k + 1):
        yield iter_[i : i + k]

def analyze(history: MarketItemSellHistory) -> SellHistoryAnalyzeResult:
    j = json.loads(history.history)
    sells_last_day = 0
    sells_last_week = 0
    sells_last_month = 0
    curr_dt = CURRENT_DATE
    to_process = []
    for timestamp, price, amount in reversed(j):
        dt = steam_date_str_to_datetime(timestamp)
        price = round(price, 2)
        amount = int(amount)
        if curr_dt - dt <= timedelta(days=1):
            sells_last_day += amount
        if curr_dt - dt <= timedelta(days=7):
            sells_last_week += amount
        if curr_dt - dt <= timedelta(days=30):
            sells_last_month += amount
        if curr_dt - dt > timedelta(days=30):
            break
        else:
            to_process.append((dt, price, amount))
    if len(to_process) < _QUANTILES_MIN_POINTS:
        return SellHistoryAnalyzeResult(
            app_id=history.app_id,
            market_hash_name=history.market_hash_name,
            currency=history.currency,
            timestamp=history.timestamp,
            sells_last_day=sells_last_day,
            sells_last_week=sells_last_week,
            sells_last_month=sells_last_month,
            recommended=False,
            deviation=None,
            sell_order=None,
            sell_order_no_fee=None,
        )
    to_process = list(reversed(to_process))
    prices = [x[1] for x in to_process]
    # dispersion = statistics.pvariance(prices)
    quantiles_count = 10
    quantiles = statistics.quantiles(prices, n=quantiles_count)
    # windows = list(window_slicing(50, prices))
    # percentile_20 = quantiles[1]  # 1 is 20% percentile
    percentile_80 = quantiles[7]  # 7 is 80% percentile
    sell_order = round(percentile_80, 2)
    slices = window_slicing(_WINDOWS_SIZE, to_process)
    slices = tuple(slices)
    slices_mean_prices = tuple(
        statistics.harmonic_mean(
            data=map(operator.itemgetter(1), slice_),  # price
            weights=map(operator.itemgetter(2), slice_),  # sold amount
        )
        for slice_ in slices
    )
    slices_mean_prices = map(functools.partial(round, ndigits=2), slices_mean_prices)
    slices_mean_prices = tuple(slices_mean_prices)
    if len(slices_mean_prices) < 5:
        return SellHistoryAnalyzeResult(
            app_id=history.app_id,
            market_hash_name=history.market_hash_name,
            currency=history.currency,
            timestamp=history.timestamp,
            sells_last_day=sells_last_day,
            sells_last_week=sells_last_week,
            sells_last_month=sells_last_month,
            recommended=False,
            deviation=None,
            sell_order=sell_order,
            sell_order_no_fee=SteamFee.subtract_fee(sell_order),
        )
    mean_min = min(slices_mean_prices)
    mean_max = max(slices_mean_prices)
    med = statistics.median(slices_mean_prices)
    perc_diff_min = percentage_diff(mean_min, med)
    perc_diff_max = percentage_diff(mean_max, med)
    deviation = statistics.stdev(slices_mean_prices) / med
    fall_deviation = statistics.stdev([slices_mean_prices[0], slices_mean_prices[-1]]) / med
    is_fall_ok = fall_deviation < _MAX_FALL_DEVIATION
    is_low_deviation = deviation < _MAX_DEVIATION
    is_min_ok = perc_diff_min < _MEAN_MIN_THRESHOLD
    is_max_ok = perc_diff_max < _MEAN_MAX_THRESHOLD
    is_ok = is_min_ok and is_max_ok
    recommended = (
        is_fall_ok and is_low_deviation and is_ok and (sells_last_month >= _MIN_SELLS_PER_MONTH)
    )
    return SellHistoryAnalyzeResult(
            app_id=history.app_id,
            market_hash_name=history.market_hash_name,
            currency=history.currency,
            timestamp=history.timestamp,
            sells_last_day=sells_last_day,
            sells_last_week=sells_last_week,
            sells_last_month=sells_last_month,
            recommended=recommended,
            deviation=deviation,
            sell_order=sell_order,
            sell_order_no_fee=SteamFee.subtract_fee(sell_order),
        )

In [13]:
def parse_history(row):
    history, orders = row
    history_analyze_result = analyze(history)
    return history, orders, history_analyze_result

In [14]:
recommended_rdd = rdd.mapValues(parse_history).filter(lambda pair: pair[1][2].recommended)

In [15]:
recommended_rdd.count()

222

In [16]:
low_price = recommended_rdd.filter(lambda pair: pair[1][2].sell_order < 200)

In [17]:
low_price.count()

182

In [18]:
sort_by_sells_per_week = low_price.sortBy(lambda pair: -pair[1][2].sells_last_month)

In [19]:
sort_by_sells_per_week.count()

182

In [20]:
final = sort_by_sells_per_week.filter(lambda pair: pair[1][2].sell_order_no_fee > pair[1][1].buy_order)

In [21]:
final.take(10)

[('★ Broken Fang Gloves | Needle Point (Field-Tested)',
  (MarketItemSellHistory(app_id=730, market_hash_name='★ Broken Fang Gloves | Needle Point (Field-Tested)', currency=1, timestamp=datetime.datetime(2022, 11, 12, 17, 41, 0, 392857, tzinfo=datetime.timezone.utc), history='[["Dec 04 2020 01: +0",159.802,"10"],["Dec 05 2020 01: +0",120.151,"16"],["Dec 06 2020 01: +0",140.415,"9"],["Dec 07 2020 01: +0",128.86,"7"],["Dec 08 2020 01: +0",120.838,"9"],["Dec 09 2020 01: +0",130.913,"9"],["Dec 10 2020 01: +0",127.581,"10"],["Dec 11 2020 01: +0",129.63,"7"],["Dec 12 2020 01: +0",132.547,"4"],["Dec 13 2020 01: +0",149.773,"7"],["Dec 14 2020 01: +0",143.146,"4"],["Dec 15 2020 01: +0",146.086,"3"],["Dec 16 2020 01: +0",146.701,"8"],["Dec 17 2020 01: +0",163.341,"3"],["Dec 18 2020 01: +0",163.388,"3"],["Dec 19 2020 01: +0",158.359,"12"],["Dec 20 2020 01: +0",176.9,"5"],["Dec 21 2020 01: +0",166.278,"10"],["Dec 22 2020 01: +0",143.478,"3"],["Dec 23 2020 01: +0",156.081,"11"],["Dec 24 2020 01: +0