In [None]:
#| default_exp data

# data -- Getting and processing the data

> A simple package for dealing with pklmart data

In [1]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *
import pandas as pd
import sys
sys.path.append('..')

In [13]:
#| export
from pklshop.connect import *
import pkgutil


In [3]:
#| export
table_names = ["tournament", "match", "game", "rally", "shot_type_ref", "shot", "player", "team",]

In [4]:
table_names

['tournament',
 'match',
 'game',
 'rally',
 'shot_type_ref',
 'shot',
 'player',
 'team']

These are the names of the tables that exist in the pklmart database. We can load these tables into a data frame using `get_tab_as_df`.

In [5]:
#| export
def get_tab_as_df(table_name:str):
    "Returns a pandas dataframe for a given table"
    if not isinstance(table_name, str):
        raise TypeError(f"table_name must be a string within {table_names}")
    if table_name not in table_names:
        raise ValueError(f"Table name {table_name} is not a name in table_names")
    params = config()
    conn = DbConnection(params)
    df = conn.pull_data(table_name)
    return df

In [None]:
show_doc(get_tab_as_df)

---

[source](https://github.com/NolanSmyth/pklshop/blob/main/pklshop/data.py#L13){target="_blank" style="float:right; font-size:smaller"}

### get_tab_as_df

>      get_tab_as_df (table_name:str)

Returns a pandas dataframe for a given table

Here's an example of creating and displaying a df from the `match` table.

In [None]:
match_df= get_tab_as_df("match")
match_df.head()

Unnamed: 0,match_id,tourn_id,consol_ind,team_id_1,team_id_2,maint_dtm,maint_app,create_dtm,create_app
0,M1,T1,N,T1,T2,2022-04-09 03:19:33.840951+00:00,postgres,2022-04-09 03:19:33.840951+00:00,postgres
1,M2,T2,N,T2,T3,2022-05-26 00:45:11.301752+00:00,postgres,2022-05-26 00:45:11.301752+00:00,postgres
2,M5,T5,N,T6,T5,2022-06-28 00:40:22.948360+00:00,postgres,2022-06-28 00:40:22.948360+00:00,postgres
3,M6,T6,N,T5,T7,2022-07-07 23:01:45.921540+00:00,postgres,2022-07-07 23:01:45.921540+00:00,postgres
4,M7,T7,N,T8,T9,2022-07-11 02:40:50.597016+00:00,postgres,2022-07-11 02:40:50.597016+00:00,postgres


In [None]:
#| hide
#Ensure the table name is the correct type and is in the database
test_fail(lambda: get_tab_as_df(1), contains="table_name must be a string within")
test_fail(lambda: get_tab_as_df("match1"), contains="Table name match1 is not a name in table_names")

In [17]:
#| hide
#Save the dataframes to csv files

for table_name in table_names:
    df = get_tab_as_df(table_name)
    df.to_csv(f"../pklshop/datasets/{table_name}.csv", index=False)

In [38]:
from io import BytesIO


In [39]:
game_dat = pkgutil.get_data('pklshop', "datasets/game.csv")
game = pd.read_csv(BytesIO(game_dat), encoding="utf-8")

In [40]:
game

Unnamed: 0,game_id,match_id,game_nbr,score_w,score_l,w_team_id,l_team_id,vod_url,skill_lvl,det_user,maint_dtm,maint_app,create_dtm,create_app
0,G1,M1,1,12,10,T2,T1,https://youtu.be/r_35A7D1CQc?t=26520,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
1,G2,M1,2,11,7,T1,T2,https://youtu.be/r_35A7D1CQc?t=28205,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
2,G3,M1,3,11,8,T2,T1,https://youtu.be/r_35A7D1CQc?t=29693,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
3,G4,M2,1,11,8,T3,T2,https://youtu.be/yLAuSzRr9ls,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
4,G5,M2,2,11,3,T2,T3,https://youtu.be/yLAuSzRr9ls,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
5,G6,M2,3,11,7,T2,T3,https://youtu.be/yLAuSzRr9ls,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
6,G8,M4,2,11,9,T5,T4,https://youtu.be/Gqq8hBZtghs?t=975,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
7,G9,M4,3,11,8,T5,T4,https://youtu.be/Gqq8hBZtghs?t=2192,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
8,G10,M4,4,11,2,T5,T4,https://youtu.be/Gqq8hBZtghs?t=3520,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres
9,G7,M4,1,11,8,T4,T5,https://youtu.be/Gqq8hBZtghs,Pro,aspancake,2022-08-21 20:57:06.721623+00:00,postgres,2022-08-21 20:57:06.721623+00:00,postgres


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()