In [1]:
import os
import json
import os.path
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir('..')
from dotenv import load_dotenv

load_dotenv(".env")  # take environment variables from .env.

from steam_trade_bot.containers import Container
from steam_trade_bot.settings import BotSettings
container = Container()
container.config.from_pydantic(BotSettings())
container.wire(modules=[__name__])

In [2]:
uow_ = container.repositories.unit_of_work

In [3]:
CSGO_APP_ID = 730
async with uow_() as uow:
    market_names = await uow.market_item.get_all(app_id=CSGO_APP_ID)
    knifes_and_gloves = list(filter(lambda name: name.market_hash_name.startswith('★'), market_names))

In [4]:
len(knifes_and_gloves)

2294

In [5]:
DEFAULT_CURRENCY = 1

MAX_PRICE = 200

history = {}
orders = {}

async with uow_() as uow:
    for market_item in knifes_and_gloves:
        market_hash_name = market_item.market_hash_name
        item_orders = await uow.market_item_orders.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
        if item_orders and ((item_orders.sell_order and item_orders.sell_order < MAX_PRICE) or (item_orders.buy_order and item_orders.buy_order < MAX_PRICE)):
            # history[market_hash_name] = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            orders[market_hash_name] = item_orders 

In [6]:
len(history), len(orders)

(0, 1063)

In [7]:
from datetime import datetime, timedelta, timezone
curr_dt = datetime.now(timezone.utc)
MAX_AGE = timedelta(days=70)


market_item_importer = container.services.market_item_importer_from_orders()


for item_orders in orders.values():
    if curr_dt - item_orders.timestamp > MAX_AGE:
        await market_item_importer.import_item_orders(app_id=item_orders.app_id, market_hash_name=item_orders.market_hash_name, currency=DEFAULT_CURRENCY)

In [8]:
DEFAULT_CURRENCY = 1

curr_dt = datetime.now(timezone.utc)
MAX_AGE = timedelta(days=70)
MAX_PRICE = 100

history = {}
orders = {}
market_items = {}
market_item_importer = container.services.market_item_importer_from_page()

async with uow_() as uow:
    for market_item in knifes_and_gloves:
        market_hash_name = market_item.market_hash_name
        item_orders = await uow.market_item_orders.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
        if item_orders and ((item_orders.sell_order and item_orders.sell_order < MAX_PRICE) or (item_orders.buy_order and item_orders.buy_order < MAX_PRICE)):
            item_history = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            if curr_dt - item_history.timestamp > MAX_AGE:
                await market_item_importer.import_item(app_id=item_orders.app_id, market_hash_name=item_orders.market_hash_name ,currency=DEFAULT_CURRENCY)
            history[market_hash_name] = await uow.sell_history.get(app_id=CSGO_APP_ID, market_hash_name=market_hash_name, currency=1)
            orders[market_hash_name] = item_orders
            market_items[market_hash_name] = market_item

In [9]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

spark = SparkSession.builder \
    .master('local[*]') \
    .appName('myAppName') \
    .config("spark.driver.memory", "3g") \
    .config('spark.executor.memory', '3g') \
    .getOrCreate()

sc = spark.sparkContext
sqlContext = SQLContext(sc)



In [27]:
from dataclasses import dataclass

@dataclass
class SparkInputDTO:
    market_hash_name: str
    market_fee: str | None
    history_dump: str
    history_timestamp: datetime


data = []
for key in history.keys():
    if key not in history:
        continue
    market_item = market_items[key]
    item_history = history[key]
    data.append(SparkInputDTO(
        market_hash_name=key,
        market_fee=market_item.market_fee,
        history_dump=item_history.history,
        history_timestamp=item_history.timestamp,
    ))

rdd=sc.parallelize(data, numSlices=200)

In [31]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType, TimestampType, DoubleType
schema = StructType(
    [
        StructField("market_hash_name", StringType(), False),
        StructField("market_fee", StringType(), True),
        StructField("history_dump", StringType(), False),
        StructField("history_timestamp", TimestampType(), False),
    ]
)
df = spark.createDataFrame(rdd, schema=schema).sample(0.2, seed=42).persist()
df

DataFrame[market_hash_name: string, market_fee: string, history_dump: string, history_timestamp: timestamp]

In [32]:
df.show()

+--------------------+----------+--------------------+--------------------+
|    market_hash_name|market_fee|        history_dump|   history_timestamp|
+--------------------+----------+--------------------+--------------------+
|★ Navaja Knife | ...|      null|[["Mar 17 2019 01...|2023-02-04 03:44:...|
|★ Driver Gloves |...|      null|[["Feb 16 2018 01...|2023-02-04 03:44:...|
|★ StatTrak™ Falch...|      null|[["Jun 01 2015 01...|2023-02-04 03:45:...|
|★ Gut Knife | Saf...|      null|[["Aug 26 2013 01...|2023-02-04 03:46:...|
|★ Falchion Knife ...|      null|[["Mar 16 2017 01...|2023-02-04 03:47:...|
|★ Bowie Knife | F...|      null|[["Feb 18 2016 01...|2023-02-04 03:47:...|
|★ Navaja Knife | ...|      null|[["Mar 21 2019 01...|2023-02-04 03:48:...|
|★ Huntsman Knife ...|      null|[["May 03 2014 01...|2023-02-04 03:49:...|
|★ Gut Knife | Dam...|      null|[["Jan 09 2015 01...|2023-02-04 03:49:...|
|★ Shadow Daggers ...|      null|[["Sep 19 2015 01...|2023-02-04 03:50:...|
|★ Broken Fa

In [33]:
df.printSchema()

root
 |-- market_hash_name: string (nullable = false)
 |-- market_fee: string (nullable = true)
 |-- history_dump: string (nullable = false)
 |-- history_timestamp: timestamp (nullable = false)



In [34]:
df.first()

Row(market_hash_name='★ Navaja Knife | Ultraviolet (Field-Tested)', market_fee=None, history_dump='[["Mar 17 2019 01: +0",85.325,"2"],["Mar 19 2019 01: +0",78.14,"1"],["Mar 20 2019 01: +0",73.243,"3"],["Mar 21 2019 01: +0",75.098,"5"],["Mar 22 2019 01: +0",76.522,"3"],["Mar 23 2019 01: +0",62.01,"1"],["Mar 24 2019 01: +0",71.575,"2"],["Mar 25 2019 01: +0",73.085,"3"],["Mar 26 2019 01: +0",70.564,"3"],["Mar 27 2019 01: +0",71.721,"1"],["Mar 29 2019 01: +0",72.885,"3"],["Mar 30 2019 01: +0",97.187,"1"],["Apr 01 2019 01: +0",61.734,"1"],["Apr 02 2019 01: +0",62.745,"1"],["Apr 03 2019 01: +0",69.216,"3"],["Apr 04 2019 01: +0",68.604,"6"],["Apr 05 2019 01: +0",84.53,"1"],["Apr 06 2019 01: +0",58.705,"1"],["Apr 07 2019 01: +0",66.122,"6"],["Apr 08 2019 01: +0",71.132,"4"],["Apr 09 2019 01: +0",79.999,"2"],["Apr 10 2019 01: +0",69.511,"3"],["Apr 11 2019 01: +0",65.149,"3"],["Apr 12 2019 01: +0",73.817,"3"],["Apr 13 2019 01: +0",77.849,"4"],["Apr 14 2019 01: +0",71.799,"4"],["Apr 15 2019 01: +

In [35]:
import pyspark.sql.functions as func
from pyspark.sql import Window as W

In [36]:
CURRENT_TIME = datetime.now(timezone.utc)
current_time_broadcast = sc.broadcast(CURRENT_TIME)

In [37]:
from pyspark.sql.types import ArrayType, FloatType


def steam_date_str_to_datetime(s: str) -> datetime:
    """
    converts str like 'Mar 16 2017 01: +0' to datetime:
    """
    s = s[: s.index(":")]
    return datetime.strptime(s, "%b %d %Y %H").astimezone(timezone.utc)


def parse_history_dump(history_dump):
    j = json.loads(history_dump)
    result = []
    for timestamp, price, amount in j:
        dt = steam_date_str_to_datetime(timestamp)
        amount = int(amount)
        result.append((dt, float(price), amount))

    return result

pairs = df.select("market_hash_name", "history_dump").collect()
result = []
for market_hash_name, history_dump in pairs:
    items = parse_history_dump(history_dump)
    for item in items:
        result.append((market_hash_name, item[0], item[1], item[2]))
history_df = spark.createDataFrame(result, ["market_hash_name", "timestamp", "price", "amount"]).persist()

# cols = list(map(
#     lambda f: func.col("tuple").getItem(f).alias(str(f)),
#     ["timestamp", "price", "amount"]))
#
# history_df = df\
#     .select("market_hash_name", "history_dump") \
#     .withColumn("history", parse_history_dump(func.col("history_dump")))\
#     .drop("history_dump")\
#     .select(
#         "market_hash_name",
#         func.explode("history").alias("tuple"),
#     )\
#     .select(
#         "market_hash_name",
#         *cols,
#     )\
#     .persist()
#
# del cols

In [38]:
history_df.show(20, False)

+-------------------------------------------+-------------------+------+------+
|market_hash_name                           |timestamp          |price |amount|
+-------------------------------------------+-------------------+------+------+
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-17 01:00:00|85.325|2     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-19 01:00:00|78.14 |1     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-20 01:00:00|73.243|3     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-21 01:00:00|75.098|5     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-22 01:00:00|76.522|3     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-23 01:00:00|62.01 |1     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-24 01:00:00|71.575|2     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-25 01:00:00|73.085|3     |
|★ Navaja Knife | Ultraviolet (Field-Tested)|2019-03-26 01:00:00|70.564|3     |
|★ Navaja Knife | Ultraviolet (Field-Tes

In [49]:
WINDOW_SIZE = 10
w1 = W.partitionBy('market_hash_name').orderBy('timestamp').rowsBetween(W.currentRow, WINDOW_SIZE-1)
history_df\
    .filter(current_time_broadcast.value - func.col("timestamp") < timedelta(days=30)) \
    .select(
        'market_hash_name',
        func.round(func.sum('price').over(w1) / func.count('price').over(w1), 2).alias('avg'),
        func.count('price').over(w1).alias('points_in_window'),
        func.sum('amount').over(w1).alias('amount_in_window'),
        func.first('timestamp').over(w1).alias('first_timestamp'),
        func.last('timestamp').over(w1).alias('last_timestamp'),
    )\
    .orderBy(func.col("avg").asc(), "market_hash_name", "last_timestamp") \
    .show(20, False)

+---------------------------------------------+-----+----------------+----------------+-------------------+-------------------+
|market_hash_name                             |avg  |points_in_window|amount_in_window|first_timestamp    |last_timestamp     |
+---------------------------------------------+-----+----------------+----------------+-------------------+-------------------+
|★ Driver Gloves | Racing Green (Field-Tested)|46.14|10              |18              |2023-01-20 23:00:00|2023-01-21 13:00:00|
|★ Driver Gloves | Racing Green (Field-Tested)|46.55|10              |17              |2023-01-20 22:00:00|2023-01-21 12:00:00|
|★ Driver Gloves | Racing Green (Field-Tested)|46.62|10              |16              |2023-01-17 07:00:00|2023-01-17 19:00:00|
|★ Driver Gloves | Racing Green (Field-Tested)|46.72|10              |15              |2023-01-17 04:00:00|2023-01-17 18:00:00|
|★ Driver Gloves | Racing Green (Field-Tested)|46.72|10              |18              |2023-01-20 14:00: