In [1]:
import math
import os
import ast

import sqlparse
import numpy as np
import pandas as pd
from sqlalchemy import MetaData, Table, literal_column, Column
from sqlalchemy.sql import select, func, or_
from sqlalchemy.dialects.mysql import insert
from sqlalchemy.dialects.mysql.types import VARCHAR, TINYINT, TEXT

from f3_data_builder import mysql_connection, region_queries, pull_main_data, build_users, build_aos, build_beatdowns, build_attendance

In [2]:
engine = mysql_connection()

In [3]:
metadata = MetaData()
metadata.reflect(engine, schema="weaselbot")

Table("regions", 
      metadata, 
      Column("region", VARCHAR(length=45), nullable=False, primary_key=True), 
      Column("slack_token", VARCHAR(length=90), nullable=False),
      Column("schema_name", VARCHAR(length=45), nullable=True, default=None),
      Column("active", TINYINT(), default=1),
      Column("firstf_channel", VARCHAR(length=45), nullable=True, default=None),
      Column("contact", VARCHAR(length=45), nullable=True, default=None),
      Column("send_pax_charts", TINYINT(), default=0),
      Column("send_ao_leaderboard", TINYINT(), default=0),
      Column("send_q_charts", TINYINT(), default=0),
      Column("send_region_leaderboard", TINYINT(), default=0),
      Column("send_region_uniquepax_chart", TINYINT(), default=0),
      Column("send_region_stats", VARCHAR(length=45), default=0),
      Column("send_mid_month_charts", VARCHAR(length=45), default=0),
      Column("comments", TEXT()),
      schema="paxminer");

In [5]:
for table in metadata.sorted_tables:
    print(table.__repr__())

Table('regions', MetaData(), Column('region', VARCHAR(length=45), table=<regions>, primary_key=True, nullable=False), Column('slack_token', VARCHAR(length=90), table=<regions>, nullable=False), Column('schema_name', VARCHAR(length=45), table=<regions>), Column('active', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(1)), Column('firstf_channel', VARCHAR(length=45), table=<regions>), Column('contact', VARCHAR(length=45), table=<regions>), Column('send_pax_charts', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(0)), Column('send_ao_leaderboard', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(0)), Column('send_q_charts', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(0)), Column('send_region_leaderboard', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(0)), Column('send_region_uniquepax_chart', TINYINT(), table=<regions>, default=ScalarElementColumnDefault(0)), Column('send_region_stats', VARCHAR(length=45), ta

In [5]:
cr = metadata.tables["weaselbot.combined_regions"]
cu = metadata.tables["weaselbot.combined_users"]
cud = metadata.tables["weaselbot.combined_users_dup"]
ca = metadata.tables["weaselbot.combined_aos"]
cb = metadata.tables["weaselbot.combined_beatdowns"]
catt = metadata.tables["weaselbot.combined_attendance"]

In [6]:
def insert_statement(table, insert_values, update_cols):
    sql = insert(table).values(insert_values)
    on_dup = sql.on_duplicate_key_update(
        {v.name: v for v in sql.inserted if v.name in update_cols}
    )
    return on_dup

In [7]:
def region_subquery(metadata):
    cb = metadata.tables["weaselbot.combined_beatdowns"]
    a = metadata.tables["weaselbot.combined_aos"]

    sql = select(
        a.c.region_id,
        func.max(cb.c.timestamp).label("max_timestamp"),
        func.max(cb.c.ts_edited).label("max_ts_edited"),
        func.count().label("beatdown_count"),
    )
    sql = sql.select_from(cb.join(a, cb.c.ao_id == a.c.ao_id))
    sql = sql.group_by(a.c.region_id).subquery("b")
    return sql

In [8]:
def paxminer_region_query(metadata):
    r = metadata.tables["paxminer.regions"]
    cr = metadata.tables["weaselbot.combined_regions"]
    sub = region_subquery(metadata)

    sql = select(
        r.c.schema_name,
        r.c.region.label("region_name"),
        sub.c.max_timestamp,
        sub.c.max_ts_edited,
        sub.c.beatdown_count,
        cr.c.region_id,
    )
    sql = sql.select_from(
        r.outerjoin(cr, r.c.schema_name == cr.c.schema_name).outerjoin(sub, cr.c.region_id == sub.c.region_id)
    )

    return sql

In [9]:
def weaselbot_region_query(metadata):
    cr = metadata.tables["weaselbot.combined_regions"]
    sub = region_subquery(metadata)

    sql = select(cr, sub.c.beatdown_count)
    sql = sql.select_from(cr.outerjoin(sub, cr.c.region_id == sub.c.region_id))

    return sql

In [10]:
paxminer_region_sql = paxminer_region_query(metadata) # verified
weaselbot_region_sql = weaselbot_region_query(metadata) # verified

In [11]:
### Sample data to move forward with ###

region_data = dict(schema_name="f3alamo f3chicago f3naperville f3omaha f3stcharles".split(),
                   region_name="Alamo Chicago Naperville Omaha St_Charles".split(),
                   max_timestamp=[1704824582.100129, 1704808955.413849, None, 1697642801.087519, 1704825215.550419],
                   max_ts_edited=[1704824731, 1704724809, None, None, 1704825356]
                  )
df_regions = pd.DataFrame(region_data)

In [12]:
insert_values = df_regions.to_dict("records")
update_cols = ("region_name", "max_timestamp", "max_ts_edited")
region_insert_sql = insert_statement(cr, insert_values, update_cols)

In [None]:
# print(sqlparse.format(region_insert_sql.compile(engine, compile_kwargs={"literal_binds": True}).__str__(), keyword_case="upper", reindent=True))

In [13]:
dtypes = dict(
    region_id=pd.StringDtype(),  # this is a string everywhere else
    region_name=pd.StringDtype(),
    schema_name=pd.StringDtype(),
    slack_team_id=pd.StringDtype(),
    max_timestamp=pd.Float64Dtype(),
    max_ts_edited=pd.Float64Dtype(),
    beatdown_count=pd.Int16Dtype()
)

with engine.begin() as cnxn:
    df_regions = pd.read_sql(weaselbot_region_sql, cnxn, dtype=dtypes)

In [14]:
df_users_dup, df_aos, df_beatdowns, df_attendance = pull_main_data(df_regions, engine, metadata)

starting f3alamo... Done
starting f3albany... Done
starting f3albuquerque... Done
starting f3alliance... Done
starting f3anderson... Done
starting f3annapolis... Done
starting f3atlanta... Done
starting f3austin... Done
starting f3badlands... Done
starting f3beast... Done
starting f3bellingham... Done
starting f3blueridge... Done
starting f3borderlands... Done
starting f3brentwood... Done
starting f3bruco... Done
starting f3youngsville... Done
starting f3carpex... Done
starting f3centralarkansas... Done
starting f3central_il... Done
starting f3charleston... Done
starting f3charlottesville... Done
starting f3cherokee... Done
starting f3cheyenne... Done
starting f3chicago... Done
starting f3churham... Done
starting f3clearwater... Done
starting f3cleburne... Done
starting f3cleveland... Done
starting f3coloradosprings... Done
starting f3columbia... Done
starting f3columbus... Done
starting f3dallas... Done
starting f3davidson... Done
starting f3dayton... Done
starting f3delhi_ncr... Done

In [15]:
print(f"beatdowns to process: {len(df_beatdowns)}")

beatdowns to process: 75967


In [16]:
df_users_dup = build_users(df_users_dup, df_attendance, engine, metadata)

building users...


In [17]:
df_aos = build_aos(df_aos, engine, metadata)

building aos...


In [20]:
df_beatdowns = build_beatdowns(df_beatdowns, df_users_dup, df_aos, engine, metadata)

building beatdowns...


In [26]:
build_attendance(df_attendance, df_users_dup, df_aos, df_beatdowns, engine, metadata)

building attendance...


In [27]:
engine.dispose()