In [10]:
import pandas as pd
import altair as alt
import sqlite3
import os, json
import evaluate, meta, modeling, data_wrangling

In [40]:
from evaluate import TestSet
class EvalOral:
    """Bundle of testsets"""

    TESTSETS_NAME = ("strain", "grain", "taraban")

    def __init__(self, cfg, model, data):
        self.cfg = cfg
        self.model = model
        self.data = data

        self.train_mean_df = None
        self.strain_mean_df = None
        self.grain_mean_df = None
        self.taraban_mean_df = None
        self.cortese_mean_df = None
        
        # Setup database
        if self.cfg.batch_name is not None:
            
            sqlite_file = os.path.join(self.cfg.path["batch_folder"], "batch_results.sqlite")
            self.con = sqlite3.connect(sqlite_file)
            self.cur = self.con.cursor()
        
        # Load eval results from file
        for _testset_name in self.TESTSETS_NAME:
            try:
                _file = os.path.join(
                    self.cfg.path["model_folder"],
                    "eval",
                    f"{_testset_name}_mean_df.csv",
                )
                setattr(self, f"{_testset_name}_mean_df", pd.read_csv(_file))
            except (FileNotFoundError, IOError):
                pass

        # Bundle testsets into dictionary
        self.run_eval = {
            "strain": self._eval_strain,
        }
        
    def eval(self, testset_name):
        """Run eval and push to dat"""
        if getattr(self, f"{testset_name}_mean_df") is None:
            results = self.run_eval[testset_name]()
            try:
                results.to_sql(testset_name, self.con, if_exists="append")
            except:
                pass
        else:
            print("Evaluation results found, loaded from file.")


    def _eval_strain(self):

        df = pd.DataFrame()
        testsets = (
            "strain_hf_con_hi",
            "strain_hf_inc_hi",
            "strain_hf_con_li",
            "strain_hf_inc_li",
            "strain_lf_con_hi",
            "strain_lf_inc_hi",
            "strain_lf_con_li",
            "strain_lf_inc_li"
        )

        for testset_name in testsets:
            t_ps = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="pho_sem",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["pho"],
                y_test=self.data.testsets[testset_name]["sem"],
            )
            
            t_ps.eval_all()
            df = pd.concat([df, t_ps.result])
            
            t_sp = TestSet(
                name=testset_name,
                cfg=self.cfg,
                model=self.model,
                task="sem_pho",
                testitems=self.data.testsets[testset_name]["item"],
                x_test=self.data.testsets[testset_name]["sem"],
                y_test=self.data.testsets[testset_name]["pho"],
            )
            
            t_sp.eval_all()
            df = pd.concat([df, t_sp.result])


        df.to_csv(
            os.path.join(
                self.cfg.path["model_folder"], "eval", "oral_strain_item_df.csv"
            )
        )
        
        
        # Condition level aggregate
        mean_df = (
            df.groupby(
                [
                    "code_name",
                    "task",
                    "testset",
                    "epoch",
                    "timetick",
                    "y",
                ]
            )
            .mean()
            .reset_index()
        )
        mean_df.to_csv(
            os.path.join(
                self.cfg.path["model_folder"], "eval", "strain_mean_df.csv"
            )
        )
        self.strain_mean_df = mean_df
        
        return df

 

In [41]:
code_name = 'high_stress_long_pretraining'
tf_root = '/home/jupyter/tf'

cfg = meta.ModelConfig.from_json(os.path.join(tf_root, "models", code_name, "model_config.json"))
model = modeling.HS04Model(cfg)
data = data_wrangling.MyData()
cfg.batch_name = None
oral_test = EvalOral(cfg, model, data)

Loading config from /home/jupyter/tf/models/high_stress_long_pretraining/model_config.json


In [42]:
oral_test.eval('strain')

Evaluating strain_hf_con_hi: 100%|██████████| 59/59 [00:03<00:00, 16.31it/s]
Evaluating strain_hf_con_hi: 100%|██████████| 59/59 [00:06<00:00,  9.32it/s]
Evaluating strain_hf_inc_hi: 100%|██████████| 59/59 [00:03<00:00, 16.48it/s]
Evaluating strain_hf_inc_hi: 100%|██████████| 59/59 [00:06<00:00,  9.27it/s]
Evaluating strain_hf_con_li: 100%|██████████| 59/59 [00:03<00:00, 16.29it/s]
Evaluating strain_hf_con_li: 100%|██████████| 59/59 [00:06<00:00,  9.35it/s]
Evaluating strain_hf_inc_li: 100%|██████████| 59/59 [00:03<00:00, 16.49it/s]
Evaluating strain_hf_inc_li: 100%|██████████| 59/59 [00:06<00:00,  9.13it/s]
Evaluating strain_lf_con_hi: 100%|██████████| 59/59 [00:03<00:00, 16.72it/s]
Evaluating strain_lf_con_hi: 100%|██████████| 59/59 [00:06<00:00,  9.26it/s]
Evaluating strain_lf_inc_hi: 100%|██████████| 59/59 [00:03<00:00, 16.08it/s]
Evaluating strain_lf_inc_hi: 100%|██████████| 59/59 [00:06<00:00,  9.23it/s]
Evaluating strain_lf_con_li: 100%|██████████| 59/59 [00:03<00:00, 16.53it/s]

In [43]:
oral_test.strain_mean_df

Unnamed: 0,code_name,task,testset,epoch,timetick,y,acc,conditional_sse,sse
0,high_stress_long_pretraining,pho_sem,strain_hf_con_hi,1,2,sem,0.0,,9.137530
1,high_stress_long_pretraining,pho_sem,strain_hf_con_hi,1,3,sem,0.0,,9.073186
2,high_stress_long_pretraining,pho_sem,strain_hf_con_hi,1,4,sem,0.0,,9.021046
3,high_stress_long_pretraining,pho_sem,strain_hf_con_hi,1,5,sem,0.0,,9.024216
4,high_stress_long_pretraining,pho_sem,strain_hf_con_hi,1,6,sem,0.0,,9.107266
...,...,...,...,...,...,...,...,...,...
10379,high_stress_long_pretraining,sem_pho,strain_lf_inc_li,500,8,pho,1.0,0.001732,0.001732
10380,high_stress_long_pretraining,sem_pho,strain_lf_inc_li,500,9,pho,1.0,0.000911,0.000911
10381,high_stress_long_pretraining,sem_pho,strain_lf_inc_li,500,10,pho,1.0,0.000594,0.000594
10382,high_stress_long_pretraining,sem_pho,strain_lf_inc_li,500,11,pho,1.0,0.000520,0.000520


In [44]:
def csv_to_bigquery(csv_file, dataset_name, table_name):
    from google.cloud import bigquery
    import json, os
    import pandas as pd

    # Create connection to BQ and push data
    client = bigquery.Client()
    dataset = client.create_dataset(dataset_name, exists_ok=True)
    table_ref = dataset.table(table_name)

    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True
    )

    with open(csv_file, "rb") as f:
        job = client.load_table_from_file(f, table_ref, job_config=job_config)

    job.result()
    print(f"Loaded {job.output_rows} rows into {dataset_name}:{table_ref.path}")

In [47]:
csv_to_bigquery(os.path.join(cfg.path['model_folder'], 'eval', 'oral_strain_item_df.csv'), dataset_name="triangle_oral", table_name="strain")

Loaded 207680 rows into triangle_oral:/projects/mimetic-core-276919/datasets/triangle_oral/tables/strain
