In [1]:
# Reset the schema
import duckdb

# Connect to your DuckDB database (file or in-memory)
conn = duckdb.connect("mtga_local.duckdb")  # or ':memory:'

# Read the SQL file
with open("silver_duckdb_schema.sql", "r") as f:
    sql_script = f.read()

# Execute the entire SQL script
conn.execute(sql_script)

# Optional: check tables
tables = conn.execute("SHOW TABLES").fetchall()
conn.close()
print(tables)


[('decks',), ('dim_cards',), ('matches',), ('players',), ('turn1_hands',)]


In [10]:
# Load the dim_cards table
import pandas as pd
import duckdb
import json

# 1. Read CSV safely
df = pd.read_csv(
    '/home/r3gal/develop/mtga_pipeline/data/references/dim_cards.csv',
    dtype=str,      # read all columns as strings
    keep_default_na=False  # keep empty fields as empty strings instead of NaN if desired
)

# Optional: Convert JSON/list columns from strings to JSON strings
json_cols = ['colors', 'color_identity', 'legalities']
for col in json_cols:
    df[col] = df[col].apply(lambda x: x if x.startswith('[') or x.startswith('{') else '[]')

df = df.drop_duplicates(keep='first', subset='arena_id')
# 2. Connect to DuckDB
conn = duckdb.connect("mtga_local.duckdb")

# 3. Register the dataframe and insert into dim_cards
conn.register("df_temp", df)
conn.execute("INSERT INTO dim_cards SELECT * FROM df_temp")

# 4. Preview
print(conn.execute("SELECT * FROM dim_cards LIMIT 5").fetchall())
conn.close()


[('91829', 'b34bb2dc-c1af-4d77-b0b3-a0fb342a5fc6', 'Forest', datetime.date(2024, 8, 2), 'https://scryfall.com/card/blb/280/forest', '', 0, '[]', "['G']", 'Basic Land — Forest', 'blb', 'Bloomburrow', 'expansion', 'common', "{'standard': 'legal', 'future': 'legal', 'historic': 'legal', 'timeless': 'legal', 'gladiator': 'legal', 'pioneer': 'legal', 'modern': 'legal', 'legacy': 'legal', 'pauper': 'legal', 'vintage': 'legal', 'penny': 'legal', 'commander': 'legal', 'oathbreaker': 'legal', 'standardbrawl': 'legal', 'brawl': 'legal', 'alchemy': 'legal', 'paupercommander': 'legal', 'duel': 'legal', 'oldschool': 'not_legal', 'premodern': 'legal', 'predh': 'legal'}"), ('66119', '9f0d82ae-38bf-45d8-8cda-982b6ead1d72', 'Siren Lookout', datetime.date(2017, 9, 29), 'https://scryfall.com/card/xln/78/siren-lookout', '{2}{U}', 3, "['U']", "['U']", 'Creature — Siren Pirate', 'xln', 'Ixalan', 'expansion', 'common', "{'standard': 'not_legal', 'future': 'not_legal', 'historic': 'legal', 'timeless': 'legal'

In [None]:
import pandas as pd
import duckdb
import json

# Connect to persistent DuckDB database
conn = duckdb.connect(database='/home/r3gal/develop/mtga_pipeline/cloud/mtga_local.duckdb')

# read each csv 1 game at a time and partition into the correct tables


csv_path = "/home/r3gal/develop/mtga_pipeline/data/parsed_csv/Filtered_Player_20260212_233143_test.csv"


# For each game
# save the deck_list in decks
# save the mulligans
# save the match details in matches
#   card draws -> draw_order

# add a new player to players if no match was found for player_id

# The csv will contain at min 1 game, can contain more

dtype_map = {
    'game_num': 'Int64',
    'timestamp': 'str',
    'event': 'str',
    'payload': 'str'
}
df = pd.read_csv(csv_path, dtype=dtype_map)

# group by game_num
# for game_num, group_df in df.groupby("game_num"):
#     pass


# exploding the payload row-wise
df['payload'] = df['payload'].apply(json.loads)
df = df.explode('payload').reset_index(drop=True)



# used to grab end of hand selection phase
def is_beginning_phase(payload_line):
    if payload_line.get('type') != 'GREMessageType_GameStateMessage':
        return False
    
    gsm = payload_line.get('gameStateMessage')
    if not gsm:
        return False
    
    turn = gsm.get('turnInfo')
    if not turn:
        return False
    
    return turn.get('phase') == 'Phase_Beginning'

  
# verifies if the ZoneType_Hand is within the payload line, 
# need to drop first row as it is a setup row and doesnt have hand contents
def has_hand_zone(payload_line):
    if payload_line.get('type') != 'GREMessageType_GameStateMessage':
        return False

    gsm = payload_line.get('gameStateMessage')
    if not gsm:
        return False

    return any(
        z.get('type') == 'ZoneType_Hand'
        for z in gsm.get('zones', [])
    )

import ast
def insert_hands (conn, df, hand_id, match_id):

    # grab only the begining hands
    # cut off the gamestart from the payload, where the phase switches to Phase_Beginning
    df['payload'] = df['payload'].apply(ast.literal_eval)
    end_idx = df[df['payload'].apply(is_beginning_phase).values]

    final_hand = end_idx.index[0] + 1
    df_next = df.iloc[:final_hand]

    df_hands = df_next[df_next['payload'].apply(has_hand_zone).values][1:]

#   up to here works


    player_id = df_hands.iloc[0]['player_id']
    seatID = df_hands['payload'].iloc[0].get('systemSeatIds')[0]

    gameObjectMap = df_hands['payload'].apply(
        lambda x: x.get('gameStateMessage').get('gameObjects')
    )
        # for each item I only need instanceId and grpId
    
    # df_hands[
    #     df_hands['payload'].apply()
    # ]

    hands = []
    # push each new hand to hands as an array of dicts. x,y,z = grpids
    hands = [
        {'hand_id': '12345',
        'init_hand': '[x, y, z]',
        'mulliganCount': '0',
        'final_hand': '[x, y, z]'
        },
        {'hand_id': '12346',
        'init_hand': '[x, y, z]',
        'mulliganCount': '1',
        'final_hand': '[x, y]'
        }
    ]

    # go into zones get objectInstanceId
    # zones = df['payload'].iloc[0].get('gameStateMessage').get('zones')
    # map objectInstanceId to grpid


    # find initials hand and other mulligan hands
    # df['payload'].get('gameStateMessage').get('turnInfo').get('phase')

#   end of hand selection signal
    # for row in df['payload']:
    #     if df['payload'].get('gameStateMessage').get('turnInfo').get('phase') == "Phase_Begining":
    #         pass
            # hand selection has completed

    # df['payload'] = gameStateMessage.turnInfo.phase = Phase_Begining


    conn.execute(
        """
        INSERT INTO players (player_id, display_name, region)
        SELECT ?, ?, ?
        WHERE NOT EXISTS (
            SELECT 1 FROM players WHERE player_id = ?
        )
        """,
        (str(player_id), str(display_name), str(region), str(player_id))
    )


# pass

df.head()


Unnamed: 0,game_num,player_id,timestamp,event,payload
0,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/12/2026 11:26:17 PM,deck_list,"{'id': 'c559b3a9-2778-4b41-9c56-23fceb37520a',..."
1,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/12/2026 11:26:17 PM,GreToClientEvent,"[{'type': 'GREMessageType_DieRollResultsResp',..."
2,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/12/2026 11:26:19 PM,GreToClientEvent,"[{'type': 'GREMessageType_SetSettingsResp', 's..."
3,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/12/2026 11:26:20 PM,GreToClientEvent,"[{'type': 'GREMessageType_GameStateMessage', '..."
4,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/12/2026 11:26:29 PM,GreToClientEvent,"[{'type': 'GREMessageType_GameStateMessage', '..."


In [None]:

deck_obj = df.iloc[0]['payload'].get('request')
nested = json.loads(deck_obj)
# Gives Deck name
deck_name = nested.get('Summary').get('Name')

# Gets the array of the main deck: cardId and quantity are keys
# [{'cardId': 95192, 'quantity': 9},
#  {'cardId': 93715, 'quantity': 1},
#  {'cardId': 95200, 'quantity': 6}]
deck_list = nested.get('Deck').get('MainDeck')

# sideboard and commander
deck_sideboard = nested.get('Deck').get('Sideboard')
deck_commander = nested.get('Deck').get('CommandZone')

deck_row = pd.DataFrame({
    'deck_id': # auto increment
    'player_id': player_id # pull player_id
    'match_id':  # auto increment
    'deck_list': deck_list
    'deck_sideboard': deck_sideboard
    'deck_commander': deck_commander
})

conn.execute("INSERT INTO players SELECT * FROM df")

In [3]:

con.close()

In [9]:
import pandas as pd 
import json

csv_path = "/home/r3gal/develop/mtga_pipeline/data/parsed_csv/Filtered_Player_20260220_113325_mulligans.csv"


dtype_map = {
    'game_num': 'Int64',
    'timestamp': 'str',
    'event': 'str',
    'payload': 'str'
}
df = pd.read_csv(csv_path, dtype=dtype_map)

# exploding the payload row-wise
df['payload'] = df['payload'].apply(json.loads)
df = df.explode('payload').reset_index(drop=True)

df.to_csv('/home/r3gal/develop/mtga_pipeline/cloud/test_4mulligan.csv')
df.head()

Unnamed: 0,game_num,player_id,timestamp,event,payload
0,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/20/2026 11:32:17 AM,deck_list,"{'id': 'd9054471-c273-4cbb-b408-07d81b29b8dc',..."
1,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/20/2026 11:32:17 AM,GreToClientEvent,"{'type': 'GREMessageType_ConnectResp', 'system..."
2,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/20/2026 11:32:17 AM,GreToClientEvent,"{'type': 'GREMessageType_DieRollResultsResp', ..."
3,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/20/2026 11:32:17 AM,GreToClientEvent,"{'type': 'GREMessageType_GameStateMessage', 's..."
4,1,V7JT5YS7ANCWRHG35IZNV4PKOY,2/20/2026 11:32:17 AM,GreToClientEvent,{'type': 'GREMessageType_ChooseStartingPlayerR...


In [None]:
import pandas as pd
import json

def has_hand_zone(payload_line):
    if payload_line.get('type') != 'GREMessageType_GameStateMessage':
        return False

    gsm = payload_line.get('gameStateMessage')
    if not gsm:
        return False

    return any(
        z.get('type') == 'ZoneType_Hand'
        for z in gsm.get('zones', [])
    )

def is_beginning_phase(payload_line):
    if payload_line.get('type') != 'GREMessageType_GameStateMessage':
        return False
    
    gsm = payload_line.get('gameStateMessage')
    if not gsm:
        return False
    
    turn = gsm.get('turnInfo')
    if not turn:
        return False
    
    return turn.get('phase') == 'Phase_Beginning'

csv_path = "/home/r3gal/develop/mtga_pipeline/cloud/test_4mulligan.csv"
df = pd.read_csv(csv_path)
import ast
# df['payload'] = df['payload'].apply(json.loads)
df = df.drop(0)
df['payload'] = df['payload'].apply(ast.literal_eval)

# mask = df['payload'].apply(has_hand_zone)
# start_idx = mask[mask]
# # end_idx = df['payload'].apply(is_beginning_phase).first_valid_index()
# end_idx = df['payload'].apply(is_beginning_phase).idxmax()

# open_hands = df.loc[start_idx:end_idx]
# open_hands

# start_idx = df.index[df['payload'].apply(has_hand_zone)].first_valid_index()
# end_idx = df.index[df['payload'].apply(is_beginning_phase)].first_valid_index()

start_idx = df['payload'].apply(has_hand_zone).first_valid_index()
end_idx = df['payload'].apply(is_beginning_phase).first_valid_index()

open_hands = df.loc[start_idx:end_idx]
# print(df['payload'][1]['type'])    # GREMessageType_ConnectResp
# print(df['payload'][1].get('msgId'))  # 2
# mask
# start_idx
# df.head(10)
# df.
# open_hands
# start_idx
end_idx

1

In [None]:

import ast

df['payload'] = df['payload'].apply(ast.literal_eval)

    # grab only the begining hands
# start_idx = df.index[df['payload'].apply(has_hand_zone)].first_valid_index()
mask = df['payload'].apply(has_hand_zone)
start_idx = mask[mask].index[0]
end_idx = df.index[df['payload'].apply(is_beginning_phase)].first_valid_index()

open_hands = df.loc[start_idx:end_idx]
open_hands

In [39]:
import pandas as pd
import json
import ast

def has_hand_zone(payload_line):
    if payload_line.get('type') != 'GREMessageType_GameStateMessage':
        print("No GSM type")
        return False

    gsm = payload_line.get('gameStateMessage')
    if not gsm:
        print("No GSM msg")
        return False

    return any(
        z.get('type') == 'ZoneType_Hand'
        for z in gsm.get('zones', [])
    )

csv_path = "/home/r3gal/develop/mtga_pipeline/cloud/test_4mulligan.csv"
df = pd.read_csv(csv_path)
# df['payload'] = df['payload'].apply(json.loads)
# df = df.drop(0)
df['payload'] = df['payload'].apply(ast.literal_eval)  
# df_one_row = df.iloc[[6]] 
df_new = df[df['payload'].apply(has_hand_zone)]
df_new
pd.set_option('display.max_colwidth', default)
# display(df_one_row['payload'])

No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type
No GSM type


NameError: name 'default' is not defined