In [None]:
import pandas as pd
import altair as alt
import sqlite3
import os, json
import evaluate, meta, modeling, data_wrangling

In [None]:
from evaluate import TestSet
class EvalOral:
    """Bundle of testsets"""

    TESTSETS_NAME = ("strain", "grain", "taraban")

    def __init__(self, cfg, model, data):
        self.cfg = cfg
        self.model = model
        self.data = data

        self.train_mean_df = None
        self.strain_mean_df = None
        self.grain_mean_df = None
        self.taraban_mean_df = None
        self.cortese_mean_df = None
        
        # Setup database
        if self.cfg.batch_name is not None:
            
            sqlite_file = os.path.join(self.cfg.path["batch_folder"], "batch_results.sqlite")
            self.con = sqlite3.connect(sqlite_file)
            self.cur = self.con.cursor()
        
        # Load eval results from file
        for _testset_name in self.TESTSETS_NAME:
            try:
                _file = os.path.join(
                    self.cfg.path["model_folder"],
                    "eval",
                    f"{_testset_name}_mean_df.csv",
                )
                setattr(self, f"{_testset_name}_mean_df", pd.read_csv(_file))
            except (FileNotFoundError, IOError):
                pass

        # Bundle testsets into dictionary
        self.run_eval = {
            "strain": self._eval_strain,
            "taraban": self._eval_taraban
        }
        
    def eval(self, testset_name):
        """Run eval and push to dat"""
        if getattr(self, f"{testset_name}_mean_df") is None:
            results = self.run_eval[testset_name]()
            try:
                results.to_sql(testset_name, self.con, if_exists="append")
            except:
                pass
        else:
            print("Evaluation results found, loaded from file.")


    def _eval_strain(self):

        df = pd.DataFrame()
        testsets = (
            "strain_hf_con_hi",
            "strain_hf_inc_hi",
            "strain_hf_con_li",
            "strain_hf_inc_li",
            "strain_lf_con_hi",
            "strain_lf_inc_hi",
            "strain_lf_con_li",
            "strain_lf_inc_li"
        )

        for testset_name in testsets:
            t_ps = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="pho_sem",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["pho"],
                y_test=self.data.testsets[testset_name]["sem"],
            )
            
            t_ps.eval_all()
            df = pd.concat([df, t_ps.result])
            
            t_sp = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="sem_pho",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["sem"],
                y_test=self.data.testsets[testset_name]["pho"],
            )
            
            t_sp.eval_all()
            df = pd.concat([df, t_sp.result])


        df.to_csv(
            os.path.join(
                self.cfg.path["model_folder"], "eval", "strain_item_df.csv"
            )
        )
        
        
        # Condition level aggregate
        mean_df = (
            df.groupby(
                [
                    "code_name",
                    "task",
                    "testset",
                    "epoch",
                    "timetick",
                    "y",
                ]
            )
            .mean()
            .reset_index()
        )
        mean_df.to_csv(
            os.path.join(
                self.cfg.path["model_folder"], "eval", "strain_mean_df.csv"
            )
        )
        self.strain_mean_df = mean_df
        
        return df

    def _eval_taraban(self):

        testsets = (
            "taraban_hf-exc",
            "taraban_hf-reg-inc",
            "taraban_lf-exc",
            "taraban_lf-reg-inc",
            "taraban_ctrl-hf-exc",
            "taraban_ctrl-hf-reg-inc",
            "taraban_ctrl-lf-exc",
            "taraban_ctrl-lf-reg-inc",
        )

        df = pd.DataFrame()

        for testset_name in testsets:

            t_ps = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="pho_sem",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["pho"],
                y_test=self.data.testsets[testset_name]["sem"],
            )

            t_ps.eval_all()
            df = pd.concat([df, t_ps.result])

            t_sp = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="sem_pho",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["sem"],
                y_test=self.data.testsets[testset_name]["pho"],
            )

            t_sp.eval_all()
            df = pd.concat([df, t_sp.result])

        df.to_csv(
            os.path.join(self.cfg.path["model_folder"], "eval", "taraban_item_df.csv")
        )

        mean_df = (
            df.groupby(["code_name", "task", "testset", "epoch", "timetick", "y"])
            .mean()
            .reset_index()
        )

        mean_df.to_csv(
            os.path.join(self.cfg.path["model_folder"], "eval", "taraban_mean_df.csv")
        )

        self.taraban_mean_df = mean_df
        
        return df

 

In [None]:
code_name = 'high_stress_long_pretraining'
tf_root = '/home/jupyter/tf'

cfg = meta.ModelConfig.from_json(os.path.join(tf_root, "models", code_name, "model_config.json"))
model = modeling.HS04Model(cfg)
data = data_wrangling.MyData()
cfg.batch_name = None
oral_test = EvalOral(cfg, model, data)

In [None]:
oral_test.eval('strain')

In [None]:
oral_test.strain_mean_df

In [None]:
def csv_to_bigquery(csv_file, dataset_name, table_name):
    from google.cloud import bigquery
    import json, os
    import pandas as pd

    # Create connection to BQ and push data
    client = bigquery.Client()
    dataset = client.create_dataset(dataset_name, exists_ok=True)
    table_ref = dataset.table(table_name)

    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True
    )

    with open(csv_file, "rb") as f:
        job = client.load_table_from_file(f, table_ref, job_config=job_config)

    job.result()
    print(f"Loaded {job.output_rows} rows into {dataset_name}:{table_ref.path}")

In [None]:
csv_to_bigquery(os.path.join(cfg.path['model_folder'], 'eval', 'oral_strain_item_df.csv'), dataset_name="triangle_oral", table_name="strain")