In [1]:
import pandas as pd
import psycopg2
import sqlalchemy as sa
import numpy as np
import os
os.chdir("/usr/src/app")

# Connection engine

In [7]:
db_url = "postgresql://dss:dss@postgres:5432/dss_db"
engine = sa.create_engine(db_url)


# <font size=20>  DATA LOADING

In [3]:
from deepsetstats.paths import (
    PATH_BIBLE_PLAYERS,
    PATH_TOURNAMENTS_NAMING,
    PATH_MASTER_TENNIS_TV,
    PATH_MASTER_GS,
    PATH_REFERENCE_VIDEOS,
    PATH_VIDEOS,
    PATH_ANNOTATIONS_REFERENCE_PARQUET,
    PATH_MASTER_VIDEOS,
    PATH_VIDEOS_METADATA,
    PATH_TEMPLATE_MATCHING,
    PATH_INTERVALS
)

from deepsetstats.dataset.court_detection.utils import Utils

## PLAYERS

In [35]:
df_players = pd.read_parquet(PATH_BIBLE_PLAYERS, engine="pyarrow")

In [36]:
# df_players.to_sql("players", engine, if_exists="replace", index=False)

## TOURNAMENTS

In [37]:
df_tour = pd.read_parquet(PATH_TOURNAMENTS_NAMING, engine="pyarrow")

In [40]:
# df_tour.to_sql("tournaments", engine, if_exists="replace", index=False)

## VIDEOS_TENNISTV

In [5]:
df_tennistv_videos = pd.read_parquet(PATH_MASTER_TENNIS_TV, engine="pyarrow")

In [43]:
# df_tennistv_videos.to_sql("videos_tennistv", engine, if_exists="replace", index=False)

## VIDEOS_GRANDSLAMS

In [6]:
df_gs = pd.read_parquet(PATH_MASTER_GS, engine="pyarrow")

In [46]:
# df_gs.to_sql("videos_grandslams", engine, if_exists="replace", index=False)

## REFERENCE_VIDEOS

In [47]:
df_ref = pd.read_parquet(PATH_REFERENCE_VIDEOS, engine="pyarrow")

In [49]:
#df_ref.to_sql("reference_videos", engine, if_exists="replace", index=False)

## ANNOTATIONS REFERENCE COURT

In [12]:
df_annot = pd.read_parquet(PATH_ANNOTATIONS_REFERENCE_PARQUET, engine="pyarrow")

In [13]:
array2list = lambda arr: np.vstack(arr).tolist()

# Postgresql only likes Lists not arrays in numpy
df_annot["court"] = df_annot["court"].map(array2list)
df_annot["net"] = df_annot["net"].map(array2list)

In [14]:
df_annot.to_sql("annotations_ref", engine, if_exists="replace", index=False)

65

## MASTER_VIDEOS

In [11]:
df_master_videos = pd.read_parquet(PATH_MASTER_VIDEOS, engine="pyarrow")

In [10]:
# Upload to master table in the database
df_master_videos.to_sql("master_videos", engine, if_exists="replace", index=False)

434

## METADATA_VIDEOS

In [4]:
df_meta_videos = pd.read_parquet(PATH_VIDEOS_METADATA, engine="pyarrow")

In [8]:
# Upload to master table in the database
# df_meta_videos.to_sql("meta_videos", engine, if_exists="replace", index=False)

286

## TEMPLATE_MATCHING

In [9]:
df_temp_match = pd.read_parquet(PATH_TEMPLATE_MATCHING, engine="pyarrow")

In [10]:
df_temp_match.shape

(1507454, 4)

In [11]:
len(set(df_temp_match.video_id))

2286

In [12]:
df_temp_match.groupby("video_id").mean()

  df_temp_match.groupby("video_id").mean()


Unnamed: 0_level_0,frame_num,confidence
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1
-3ZqY8NTd7w,8162.5,3.477064
-Jy7kaYd4bs,2337.5,1.452128
-lmE6hoxY3Y,2362.5,2.131579
-s4E0Le-_IM,2287.5,1.527174
00Yz4xddRiw,6275.0,2.176938
...,...,...
n9oA5WrSOEg,8400.0,2.000000
nxUcT25ZIGw,4740.0,1.534591
pRqXjywBrIE,26010.0,0.976959
qmKmUF0Zpjc,9237.5,1.924324


In [14]:
df_temp_match[df_temp_match["confidence"] >= 4]["video_id"].unique().shape

(2148,)

In [15]:
# Upload to master table in the database
df_temp_match.to_sql("template_matching", engine, if_exists="replace", index=False)

454

## IMAGES INTERVALS

In [16]:
df_intervals = pd.read_parquet(PATH_INTERVALS, engine="pyarrow")

In [18]:
df_intervals.video_id.unique().shape

(1796,)

In [19]:
# Upload to master table in the database
# df_intervals.to_sql("images_intervals", engine, if_exists="replace", index=False)

373


# <font size=20>  RETRIEVAL

In [4]:
db_url = "postgresql://dss:dss@postgres:5432/dss_db"
engine = sa.create_engine(db_url)

In [5]:
# SQL query
query = "SELECT * FROM annotations_ref;"

In [6]:
# Execute the query and fetch the result into a Pandas DataFrame
df_annot = pd.read_sql_query(query, engine)

In [None]:
# modify Psql own way of encoding List of Lists
df_annot["net"] = df_annot["net"].map(Utils.psql_lol_to_numpy)
df_annot["court"] = df_annot["court"].map(Utils.psql_lol_to_numpy)

# <font size=20> APPENDIX

# Create a dummy table

In [6]:
df = pd.DataFrame({"name": ["John Doe", "Jane Doe"], "age": [30, 25]})
table_name = "my_table"
df.to_sql(table_name, engine, if_exists="replace", index=False)

2