In [11]:
import sys
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger()
import tqdm
from pathlib import Path

In [3]:
import gspread
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials

In [4]:
import pandas as pd

In [5]:
gc = gspread.oauth(http_client=gspread.BackOffHTTPClient)

In [6]:
TGT_FOLDER_ID = "16vFggD_RFkWa-sKZipSi1Ugh_br8zvNX"
BASE_SPREAD_SHEET_URL = "https://docs.google.com/spreadsheets/d/13rSZKWu9oc3nBqz79TgupvoaBGaRpwTT08flCAKII-Q"

In [7]:
def duplicate_spreadsheet(gc, spread_sheet, new_file_name, folder_id):
    if matches:=gc.list_spreadsheet_files(new_file_name, folder_id):
        logger.info(f"{new_file_name} exists")
        try:
            return gc.open(matches[0]["id"])
        except:
            pass

    new_spread_sheet = gc.copy(
        file_id=spread_sheet.id,
        title=new_file_name,
        folder_id=folder_id,
    )
    
    return new_spread_sheet

In [8]:
def get_base_wsheet(spread_sheet, wsheet_name):
    for ws in spread_sheet.worksheets():
        if ws.title == wsheet_name:
            return ws
    return None

In [9]:
def copy_base_spread_sheet(base_spread_sheet_url, new_file_name, directory_id):
    base_spreadsheet = gc.open_by_url(BASE_SPREAD_SHEET_URL)
    new_sp = duplicate_spreadsheet(gc, base_spreadsheet, new_file_name, directory_id)
    return new_sp

In [59]:
def load_df(path):
    df = (pd
        .read_csv(path, index_col=[0, 1, 2, 3])
        .sample(frac=1)
    )
    return df

In [110]:
def fill_spread_sheet(spread_sheet, df):
    base_sheet = get_base_wsheet(spread_sheet, "blank")
    assert base_sheet is not None
    base_sheet_id = base_sheet.id
    for i, (idx, row) in tqdm.tqdm(enumerate(df.iterrows())):
        gen_id = idx[0]
        source = row["inputs"]
        pred = row["value"]

        ws = spread_sheet.duplicate_sheet(
            source_sheet_id = base_sheet_id,
            insert_sheet_index=i+1,
            new_sheet_name=f"{i+1}"
        )
        ws.update_acell("a1", gen_id)
        ws.update_acell("d1", source)
        ws.update_acell("g1", pred)

            
    spread_sheet.del_worksheet(spread_sheet.get_worksheet(0))

In [111]:
for f in Path("./data/annot_dfs/").glob("samples_ann_*.csv"):
    name = f"falc.mtl.{int(f.stem.split("_")[-1])}"
    df = load_df(f)
    spread_sheet = copy_base_spread_sheet(BASE_SPREAD_SHEET_URL, name, TGT_FOLDER_ID)
    fill_spread_sheet(spread_sheet, df)

INFO:root:falc.mtl.1 exists


24it [00:44,  1.86s/it]


INFO:root:falc.mtl.2 exists


24it [01:31,  3.81s/it]


INFO:root:falc.mtl.3 exists


24it [01:37,  4.08s/it]


INFO:root:falc.mtl.4 exists


24it [00:56,  2.37s/it]


INFO:root:falc.mtl.5 exists


24it [01:32,  3.85s/it]


INFO:root:falc.mtl.6 exists


24it [02:09,  5.38s/it]


INFO:root:falc.mtl.7 exists


24it [01:06,  2.76s/it]


INFO:root:falc.mtl.8 exists


24it [02:00,  5.01s/it]


INFO:root:falc.mtl.9 exists


24it [02:35,  6.48s/it]


INFO:root:falc.mtl.10 exists


24it [00:51,  2.14s/it]
