In [1]:
import os
import json
import os.path
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir('..')
from dotenv import load_dotenv

load_dotenv(".env")  # take environment variables from .env.

from steam_trade_bot.containers import Container
from steam_trade_bot.settings import BotSettings
container = Container()
container.config.from_pydantic(BotSettings())
container.wire(modules=[__name__])

In [2]:
uow_ = container.repositories.unit_of_work

In [3]:
CSGO_APP_ID = 730
async with uow_() as uow:
    market_names = await uow.market_item.get_all(app_id=CSGO_APP_ID)
    knifes_and_gloves = list(filter(lambda name: name.market_hash_name.startswith('★'), market_names))

In [4]:
len(knifes_and_gloves)

2294

In [5]:
DEFAULT_CURRENCY = 1

MAX_PRICE = 200

history = {}
orders = {}

async with uow_() as uow:
    for market_item in knifes_and_gloves:
        market_hash_name = market_item.market_hash_name
        item_orders = await uow.market_item_orders.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
        if item_orders and ((item_orders.sell_order and item_orders.sell_order < MAX_PRICE) or (item_orders.buy_order and item_orders.buy_order < MAX_PRICE)):
            # history[market_hash_name] = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            orders[market_hash_name] = item_orders 

In [6]:
len(history), len(orders)

(0, 1066)

In [7]:
from datetime import datetime, timedelta, timezone
curr_dt = datetime.now(timezone.utc)
MAX_AGE = timedelta(days=2)


market_item_importer = container.services.market_item_importer_from_orders()


for item_orders in orders.values():
    if curr_dt - item_orders.timestamp > MAX_AGE:
        await market_item_importer.import_item_orders(app_id=item_orders.app_id, market_hash_name=item_orders.market_hash_name, currency=DEFAULT_CURRENCY)

In [8]:
DEFAULT_CURRENCY = 1

curr_dt = datetime.now(timezone.utc)
MAX_AGE = timedelta(days=2)
MAX_PRICE = 100

history = {}
orders = {}
market_items = {}
market_item_importer = container.services.market_item_importer_from_page()

async with uow_() as uow:
    for market_item in knifes_and_gloves:
        market_hash_name = market_item.market_hash_name
        item_orders = await uow.market_item_orders.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
        if item_orders and ((item_orders.sell_order and item_orders.sell_order < MAX_PRICE) or (item_orders.buy_order and item_orders.buy_order < MAX_PRICE)):
            item_history = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            if curr_dt - item_history.timestamp > MAX_AGE:
                await market_item_importer.import_item(app_id=item_orders.app_id, market_hash_name=item_orders.market_hash_name ,currency=DEFAULT_CURRENCY)
            history[market_hash_name] = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            orders[market_hash_name] = item_orders
            market_items[market_hash_name] = market_item

In [9]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

spark = SparkSession.builder \
    .master('local[*]') \
    .appName('myAppName') \
    .config("spark.driver.memory", "3g") \
    .config('spark.executor.memory', '3g') \
    .getOrCreate()

sc = spark.sparkContext
sqlContext = SQLContext(sc)



In [10]:
from dataclasses import dataclass

@dataclass
class SparkInputDTO:
    market_hash_name: str
    market_fee: str | None
    market_marketable_restriction: int | None
    market_tradable_restriction: int | None
    commodity: bool
    history_dump: str
    history_timestamp: datetime
    orders_buy_count: int | None
    orders_buy_order: float | None
    orders_sell_count: int | None
    orders_sell_order: float | None
    orders_sell_order_no_fee: float | None
    orders_dump: str
    orders_timestamp: datetime

data = []
for key in history.keys():
    if key not in history or key not in orders:
        continue
    market_item = market_items[key]
    item_history = history[key]
    item_orders = orders[key]
    data.append(SparkInputDTO(
        market_hash_name=key,
        market_fee=market_item.market_fee,
        market_marketable_restriction=market_item.market_marketable_restriction,
        market_tradable_restriction=market_item.market_tradable_restriction,
        commodity=market_item.commodity,
        history_dump=item_history.history,
        history_timestamp=item_history.timestamp,
        orders_buy_count=item_orders.buy_count,
        orders_buy_order=item_orders.buy_order,
        orders_sell_count=item_orders.sell_count,
        orders_sell_order=item_orders.sell_order,
        orders_sell_order_no_fee=item_orders.sell_order_no_fee,
        orders_dump=item_orders.dump,
        orders_timestamp=item_orders.timestamp,
    ))

rdd=sc.parallelize(data)

In [11]:
rdd.count()

388

In [12]:
rdd.first()

SparkInputDTO(market_hash_name='★ StatTrak™ Falchion Knife | Night (Well-Worn)', market_fee=None, market_marketable_restriction=None, market_tradable_restriction=7.0, commodity=False, history_dump='[["Jun 02 2015 01: +0",212.883,"1"],["Jun 17 2015 01: +0",115.585,"1"],["Jun 22 2015 01: +0",124.823,"1"],["Jun 28 2015 01: +0",97.75,"1"],["Jul 01 2015 01: +0",113.766,"1"],["Jul 03 2015 01: +0",111.51,"1"],["Jul 07 2015 01: +0",100.914,"1"],["Jul 09 2015 01: +0",104.068,"1"],["Jul 10 2015 01: +0",99.41,"1"],["Jul 13 2015 01: +0",103.501,"1"],["Jul 15 2015 01: +0",91.645,"2"],["Jul 18 2015 01: +0",89.48,"1"],["Jul 21 2015 01: +0",110,"1"],["Jul 23 2015 01: +0",121.381,"1"],["Jul 24 2015 01: +0",114.935,"1"],["Jul 25 2015 01: +0",114.187,"1"],["Aug 10 2015 01: +0",109.42,"1"],["Aug 11 2015 01: +0",103.5,"1"],["Aug 26 2015 01: +0",115.333,"1"],["Sep 13 2015 01: +0",100,"1"],["Sep 15 2015 01: +0",104.928,"1"],["Sep 18 2015 01: +0",104.981,"1"],["Sep 25 2015 01: +0",99.941,"1"],["Oct 07 2015 01

In [13]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType, TimestampType, DoubleType
schema = StructType(
    [
        StructField("market_hash_name", StringType(), False),
        StructField("market_fee", StringType(), True),
        StructField("market_marketable_restriction", IntegerType(), True),
        StructField("market_tradable_restriction", DoubleType(), True),
        StructField("commodity", BooleanType(), False),
        StructField("history_dump", StringType(), False),
        StructField("history_timestamp", TimestampType(), False),
        StructField("orders_buy_count", IntegerType(), True),
        StructField("orders_buy_order", DoubleType(), True),
        StructField("orders_sell_count", IntegerType(), True),
        StructField("orders_sell_order", DoubleType(), True),
        StructField("orders_sell_order_no_fee", DoubleType(), True),
        StructField("orders_dump", StringType(), False),
        StructField("orders_timestamp", TimestampType(), False),
    ]
)
df = spark.createDataFrame(rdd, schema=schema).repartition(50)
df

DataFrame[market_hash_name: string, market_fee: string, market_marketable_restriction: int, market_tradable_restriction: double, commodity: boolean, history_dump: string, history_timestamp: timestamp, orders_buy_count: int, orders_buy_order: double, orders_sell_count: int, orders_sell_order: double, orders_sell_order_no_fee: double, orders_dump: string, orders_timestamp: timestamp]

In [14]:
df.show()

+--------------------+----------+-----------------------------+---------------------------+---------+--------------------+--------------------+----------------+----------------+-----------------+-----------------+------------------------+--------------------+--------------------+
|    market_hash_name|market_fee|market_marketable_restriction|market_tradable_restriction|commodity|        history_dump|   history_timestamp|orders_buy_count|orders_buy_order|orders_sell_count|orders_sell_order|orders_sell_order_no_fee|         orders_dump|    orders_timestamp|
+--------------------+----------+-----------------------------+---------------------------+---------+--------------------+--------------------+----------------+----------------+-----------------+-----------------+------------------------+--------------------+--------------------+
|★ Hand Wraps | De...|      null|                         null|                        7.0|    false|[["Dec 04 2020 01...|2023-02-04 03:48:...|              

In [15]:
df.printSchema()

root
 |-- market_hash_name: string (nullable = false)
 |-- market_fee: string (nullable = true)
 |-- market_marketable_restriction: integer (nullable = true)
 |-- market_tradable_restriction: double (nullable = true)
 |-- commodity: boolean (nullable = false)
 |-- history_dump: string (nullable = false)
 |-- history_timestamp: timestamp (nullable = false)
 |-- orders_buy_count: integer (nullable = true)
 |-- orders_buy_order: double (nullable = true)
 |-- orders_sell_count: integer (nullable = true)
 |-- orders_sell_order: double (nullable = true)
 |-- orders_sell_order_no_fee: double (nullable = true)
 |-- orders_dump: string (nullable = false)
 |-- orders_timestamp: timestamp (nullable = false)



In [16]:
df.first()

Row(market_hash_name='★ Hand Wraps | Desert Shamagh (Field-Tested)', market_fee=None, market_marketable_restriction=None, market_tradable_restriction=7.0, commodity=False, history_dump='[["Dec 04 2020 01: +0",152.242,"8"],["Dec 05 2020 01: +0",163.2,"7"],["Dec 06 2020 01: +0",147.821,"8"],["Dec 07 2020 01: +0",130.797,"13"],["Dec 08 2020 01: +0",125.057,"5"],["Dec 09 2020 01: +0",127.573,"5"],["Dec 10 2020 01: +0",123.466,"3"],["Dec 11 2020 01: +0",132.969,"5"],["Dec 12 2020 01: +0",129.488,"8"],["Dec 13 2020 01: +0",145.673,"5"],["Dec 14 2020 01: +0",131.332,"4"],["Dec 15 2020 01: +0",128.774,"5"],["Dec 16 2020 01: +0",142.242,"4"],["Dec 17 2020 01: +0",153.144,"4"],["Dec 18 2020 01: +0",183.188,"2"],["Dec 19 2020 01: +0",169.11,"3"],["Dec 20 2020 01: +0",169.475,"2"],["Dec 21 2020 01: +0",163.932,"5"],["Dec 22 2020 01: +0",155.455,"3"],["Dec 23 2020 01: +0",160.915,"5"],["Dec 24 2020 01: +0",165.297,"4"],["Dec 25 2020 01: +0",178.929,"6"],["Dec 26 2020 01: +0",159.362,"3"],["Dec 27 2

In [17]:
import pyspark.sql.functions as func


In [18]:
CURRENT_TIME = datetime.now(timezone.utc)
current_time_broadcast = sc.broadcast(CURRENT_TIME)

In [19]:
_MAX_FALL_DEVIATION = 0.05
_MEAN_MAX_THRESHOLD = 0.1
_MEAN_MIN_THRESHOLD = 0.1
_WINDOWS_SIZE = 15
_MAX_DEVIATION = 0.06
_MIN_SELLS_PER_WEEK = 10
_QUANTILES_MIN_POINTS = 10

def steam_date_str_to_datetime(s: str) -> datetime:
    """
    converts str like 'Mar 16 2017 01: +0' to datetime:
    """
    s = s[: s.index(":")]
    return datetime.strptime(s, "%b %d %Y %H").astimezone(timezone.utc)


def percentage_diff(price1: float, price2: float) -> float:
    min_ = min(price1, price2)
    max_ = max(price1, price2)
    return (max_ - min_) / max_


def window_slicing(k, iter_):
    for i in range(0, len(iter_) - k + 1):
        yield iter_[i : i + k]

@func.udf(StringType())
def cut_to_last_30_days(history_dump) -> str:
    j = json.loads(history_dump)
    curr_dt = current_time_broadcast.value
    filtered = []
    for timestamp, price, amount in reversed(j):
        dt = steam_date_str_to_datetime(timestamp)
        if curr_dt - dt > timedelta(days=30):
            break
        else:
            filtered.append((timestamp, price, amount))

    return json.dumps(list(reversed(filtered)))

df = df.withColumn("last_month_history", cut_to_last_30_days(func.col("history_dump"))).cache()

In [20]:
df.select("last_month_history").show()

+--------------------+
|  last_month_history|
+--------------------+
|[["Jan 05 2023 14...|
|[["Jan 05 2023 15...|
|[["Jan 05 2023 19...|
|[["Jan 05 2023 17...|
|[["Jan 14 2023 23...|
|[["Jan 05 2023 15...|
|[["Jan 05 2023 15...|
|[["Jan 06 2023 16...|
|[["Jan 06 2023 21...|
|[["Jan 08 2023 03...|
|[["Jan 05 2023 14...|
|[["Jan 08 2023 18...|
|[["Jan 08 2023 21...|
|[["Jan 05 2023 15...|
|[["Jan 20 2023 23...|
|[["Jan 05 2023 15...|
|[["Jan 05 2023 15...|
|[["Jan 05 2023 15...|
|[["Jan 06 2023 18...|
|[["Jan 06 2023 11...|
+--------------------+
only showing top 20 rows



In [21]:
@func.udf(IntegerType())
def calc_sold_quantity(history_dump, days: int) -> int:
    j = json.loads(history_dump)
    curr_dt = current_time_broadcast.value
    counter = 0
    for timestamp, price, amount in reversed(j):
        dt = steam_date_str_to_datetime(timestamp)
        if curr_dt - dt > timedelta(days=days):
            break
        else:
            counter += int(amount)

    return counter

df = df\
    .withColumn("sold_last_month", calc_sold_quantity(func.col("last_month_history"), func.lit(30))) \
    .withColumn("sold_last_week", calc_sold_quantity(func.col("last_month_history"), func.lit(7))) \
    .cache()


In [22]:
df2 = df\
    .withColumn("sold_last_week_to_month_corr", func.col("sold_last_week") * 4 / func.col("sold_last_month"))
df2.select("last_month_history", "sold_last_month", "sold_last_week", "sold_last_week_to_month_corr").sort(df.sold_last_month.desc()).show()

+--------------------+---------------+--------------+----------------------------+
|  last_month_history|sold_last_month|sold_last_week|sold_last_week_to_month_corr|
+--------------------+---------------+--------------+----------------------------+
|[["Jan 05 2023 15...|            888|           212|           0.954954954954955|
|[["Jan 05 2023 14...|            682|           154|          0.9032258064516129|
|[["Jan 05 2023 16...|            605|           153|          1.0115702479338844|
|[["Jan 05 2023 14...|            603|           149|           0.988391376451078|
|[["Jan 05 2023 14...|            597|           118|          0.7906197654941374|
|[["Jan 05 2023 16...|            498|           123|          0.9879518072289156|
|[["Jan 05 2023 19...|            423|           115|          1.0874704491725768|
|[["Jan 05 2023 14...|            421|           112|          1.0641330166270784|
|[["Jan 05 2023 14...|            419|            89|          0.8496420047732697|
|[["