In [5]:
QPATH = "Quantlet/4-qode2desc"

import sys

IN_COLAB = "google.colab" in sys.modules

import os
import gc

if IN_COLAB:
    os.chdir(
        f"/content/drive/MyDrive/ColabNotebooks/IRTG/Encode_the_Qode/Encode-the-Qode/{QPATH}"
    )

#%%capture
#%pip install protobuf==3.20.1
if IN_COLAB:
    %pip install transformers[torch]
    %pip install -q sentencepiece
    %pip install datasets==2.13.1
    %pip install evaluate
    %pip install rouge_score

In [6]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [7]:
%%capture
import pandas as pd
from tqdm import tqdm

tqdm.pandas()

import torch
import torch, gc
import nltk

nltk.download("punkt")

import importlib
import analysis_modules

importlib.reload(analysis_modules)

In [8]:
for MODEL in ['CodeT5']:   #'CodeTrans',
    for SAMPLE_MODE in [ 'test']:#'val',
        if SAMPLE_MODE=='test':
            train_name = 'full_train'
            test_name  = 'test'
        elif SAMPLE_MODE=='val':
            train_name = 'train'
            test_name  = 'val'
        else: 
            print('Only test and val are available. Please change the SAMPLE_MODE')
        
        analysis_config = {
            "DATE": "20231119_normal",
            "MODE": "no_context",
            "model_name": MODEL,
            "encoder_max_length": 512,
            "decoder_max_length": 75,
            "random_state": 42,
            "learning_rate": 5e-4,
            "epochs": 15,
            "train_batch": 4,
            "eval_batch": 4,
            "warmup_steps": 100,
            "weight_decay": 0.1,
            "logging_stes": 100,
            "save_total_lim": 1,
            "save_strategy": "epoch",
            "label_smooting": 0.1,
            "predict_generate": True,
            "load_best_model_at_end": False,
            "evaluation_strategy": "epoch",
            "freeze": True,
        }
        
        analysis_config["train_data_path"] = f"../../data/preprocessed/Quantlet/{analysis_config['DATE']}/{analysis_config['MODE']}/"
        analysis_config["train_data_name"] = f"{train_name}_dataset_{analysis_config['DATE']}_sample0.json"
        
        analysis_config["val_data_path"] = f"../../data/preprocessed/Quantlet/{analysis_config['DATE']}/{analysis_config['MODE']}/"
        analysis_config["val_data_name"] = f"{test_name}_dataset_{analysis_config['DATE']}_sample0.json"

        analysis_config["analysis_name"] = analysis_modules.create_name(analysis_config)
        
        print(analysis_config["analysis_name"])

        gc.collect()
        torch.cuda.empty_cache()

        trainer = analysis_modules.scs_analyze(**analysis_config)

        gc.collect()
        torch.cuda.empty_cache()

        logs = analysis_modules.parse_logs(trainer).drop_duplicates()

        ANALYSIS_FOLDER=f'reports/analysis_report_{analysis_config["analysis_name"]}'

        logs.to_csv(f'{ANALYSIS_FOLDER}/logs.csv', index=False)

        print('Analysis finished')

CodeT5_no_context_test_20231119_normal
CodeT5_no_context_test_20231119_normal
cuda
cuda


Found cached dataset json (/home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-9befa6bc65e542c9/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


  0%|          | 0/1 [00:00<?, ?it/s]

Downloading and preparing dataset json/default to /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-4cf165996c876d3c/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-4cf165996c876d3c/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/3040 [00:00<?, ? examples/s]

Map:   0%|          | 0/327 [00:00<?, ? examples/s]

   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      6.288        0.145        0.039        0.125           0.131   

   eval_bleu  eval_gen_len  
0      0.012        14.177  




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,4.4949,4.169928,0.3139,0.1378,0.2754,0.2853,18.0428,0.0476,0.3356,0.4781,4219,8825
2,3.4993,4.010826,0.3334,0.1689,0.2928,0.2993,17.8165,0.0636,0.3169,0.4653,4106,8825
3,2.9551,3.921113,0.3541,0.1965,0.3129,0.3201,18.3914,0.087,0.3613,0.4955,4373,8825
4,2.583,3.901985,0.3777,0.2255,0.3401,0.3476,18.2508,0.0962,0.3376,0.4794,4231,8825
5,2.3256,3.909333,0.3774,0.2222,0.3399,0.3468,18.1131,0.0967,0.3513,0.4887,4313,8825
6,2.1426,3.941986,0.3718,0.2273,0.3383,0.3439,18.3853,0.1049,0.3565,0.4922,4344,8825
7,2.0072,3.95822,0.3773,0.2279,0.3438,0.3502,18.055,0.0995,0.3248,0.4707,4154,8825
8,1.9019,3.956727,0.3838,0.2357,0.348,0.3551,18.2752,0.1058,0.3473,0.486,4289,8825
9,1.8259,3.938743,0.4,0.2477,0.3669,0.3741,18.1498,0.1116,0.3521,0.4893,4318,8825
10,1.7697,3.935363,0.3952,0.2453,0.3602,0.3676,18.2385,0.1111,0.354,0.4905,4329,8825


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      3.897        0.402        0.254        0.366           0.372   

   eval_bleu  eval_gen_len  
0      0.116        18.385  
__________
Original: Generates plots of total over-/underestimation errors of naive, LASSO, and LSTM models for multiple energy consumer and prosumer data sets.


Summary before Tuning: Plots over - and underestimation for each non - terminal node in a single - line system.


Summary after Tuning: Generates plots over-andunderestimation for each dataset.


__________



__________
Original: 'hfhd_marketimpact_diff code plots the static and time-varying


Summary before Tuning: missing - block - length - non - zero - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series - time - series -


Summary after Tunin

In [9]:
print('check')

check
