In [1]:
import pandas as pd
import psycopg2
import sqlalchemy as sa
import numpy as np
import os
os.chdir("/usr/src/app")

# Connection engine

In [2]:
db_url = "postgresql://dss:dss@postgres:5432/dss_db"
engine = sa.create_engine(db_url)


# <font size=20>  DATA LOADING

In [3]:
from deepsetstats.paths import (
    PATH_BIBLE_PLAYERS,
    PATH_TOURNAMENTS_NAMING,
    PATH_MASTER_TENNIS_TV,
    PATH_MASTER_GS,
    PATH_REFERENCE_VIDEOS,
    PATH_VIDEOS,
    PATH_ANNOTATIONS_REFERENCE_PARQUET
)

from deepsetstats.dataset.court_detection.utils import Utils

## PLAYERS

In [35]:
df_players = pd.read_parquet(PATH_BIBLE_PLAYERS, engine="pyarrow")

In [36]:
# df_players.to_sql("players", engine, if_exists="replace", index=False)

## TOURNAMENTS

In [37]:
df_tour = pd.read_parquet(PATH_TOURNAMENTS_NAMING, engine="pyarrow")

In [40]:
# df_tour.to_sql("tournaments", engine, if_exists="replace", index=False)

## VIDEOS_TENNISTV

In [41]:
df_tennistv_videos = pd.read_parquet(PATH_MASTER_TENNIS_TV, engine="pyarrow")

In [43]:
# df_tennistv_videos.to_sql("videos_tennistv", engine, if_exists="replace", index=False)

## VIDEOS_GRANDSLAMS

In [44]:
df_gs = pd.read_parquet(PATH_MASTER_GS, engine="pyarrow")

In [46]:
# df_gs.to_sql("videos_grandslams", engine, if_exists="replace", index=False)

## REFERENCE_VIDEOS

In [47]:
df_ref = pd.read_parquet(PATH_REFERENCE_VIDEOS, engine="pyarrow")

In [49]:
#df_ref.to_sql("reference_videos", engine, if_exists="replace", index=False)

## ANNOTATIONS REFERENCE COURT

In [12]:
df_annot = pd.read_parquet(PATH_ANNOTATIONS_REFERENCE_PARQUET, engine="pyarrow")

In [13]:
array2list = lambda arr: np.vstack(arr).tolist()

# Postgresql only likes Lists not arrays in numpy
df_annot["court"] = df_annot["court"].map(array2list)
df_annot["net"] = df_annot["net"].map(array2list)

In [14]:
df_annot.to_sql("annotations_ref", engine, if_exists="replace", index=False)

65

## MASTER

In [50]:
s_downloaded_vids = Utils.list_videos(PATH_VIDEOS)

In [51]:
df_base = pd.concat([df_tennistv_videos, df_gs])

In [52]:
# ---------------------------------------------------- #
#    Create a Master table to rule them all
# ---------------------------------------------------- #
cols_gb  = ["video_id", "tournament_id", "tournament_name", "title"]
df_base = df_base.groupby(cols_gb)["player_id"].agg(list).reset_index()
df_base.sort_values("tournament_id", ascending=True, inplace=True)
df_base["is_downloaded"] = np.where(df_base["video_id"].isin(s_downloaded_vids), True, False)

In [55]:
# Upload to master table in the database
df_base.to_sql("master", engine, if_exists="replace", index=False)

434


# <font size=20>  RETRIEVAL

In [4]:
db_url = "postgresql://dss:dss@postgres:5432/dss_db"
engine = sa.create_engine(db_url)

In [5]:
# SQL query
query = "SELECT * FROM annotations_ref;"

In [6]:
# Execute the query and fetch the result into a Pandas DataFrame
df_annot = pd.read_sql_query(query, engine)

In [None]:
# modify Psql own way of encoding List of Lists
df_annot["net"] = df_annot["net"].map(Utils.psql_lol_to_numpy)
df_annot["court"] = df_annot["court"].map(Utils.psql_lol_to_numpy)

# <font size=20> APPENDIX

# Create a dummy table

In [6]:
df = pd.DataFrame({"name": ["John Doe", "Jane Doe"], "age": [30, 25]})
table_name = "my_table"
df.to_sql(table_name, engine, if_exists="replace", index=False)

2