In [1]:
QPATH = "Quantlet/4-qode2desc"

import sys

IN_COLAB = "google.colab" in sys.modules

import os
import gc

if IN_COLAB:
    os.chdir(
        f"/content/drive/MyDrive/ColabNotebooks/IRTG/Encode_the_Qode/Encode-the-Qode/{QPATH}"
    )

#%%capture
#%pip install protobuf==3.20.1
if IN_COLAB:
    %pip install transformers[torch]
    %pip install -q sentencepiece
    %pip install datasets==2.13.1
    %pip install evaluate
    %pip install rouge_score

In [2]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [3]:
%%capture
import pandas as pd
from tqdm import tqdm

tqdm.pandas()

import torch
import torch, gc
import nltk

nltk.download("punkt")

import importlib
import analysis_modules

importlib.reload(analysis_modules)

In [6]:
for MODEL in ['CodeTrans','CodeT5']:   
    for SAMPLE_MODE in ['val', 'test']:
        if SAMPLE_MODE=='test':
            train_name = 'fs_full_train'
            test_name  = 'test'
        elif SAMPLE_MODE=='val':
            train_name = 'fs_train'
            test_name  = 'val'
        else: 
            print('Only test and val are available. Please change the SAMPLE_MODE')
        
        analysis_config = {
            "DATE": "20231104",
            "MODE": "no_context",
            "model_name": MODEL,
            "encoder_max_length": 512,
            "decoder_max_length": 75,
            "random_state": 42,
            "learning_rate": 5e-4,
            "epochs": 15,
            "train_batch": 4,
            "eval_batch": 4,
            "warmup_steps": 100,
            "weight_decay": 0.1,
            "logging_stes": 100,
            "save_total_lim": 1,
            "save_strategy": "steps",
            "label_smooting": 0.1,
            "predict_generate": True,
            "load_best_model_at_end": False,
            "evaluation_strategy": "epoch",
            "freeze": True,
        }
        
        analysis_config["train_data_path"] = f"../../data/preprocessed/Quantlet/{analysis_config['DATE']}/{analysis_config['MODE']}/"
        analysis_config["train_data_name"] = f"{train_name}_dataset_sample_0.json"
        
        analysis_config["val_data_path"] = f"../../data/preprocessed/Quantlet/{analysis_config['DATE']}/{analysis_config['MODE']}/"
        analysis_config["val_data_name"] = f"{test_name}_dataset_{analysis_config['DATE']}_sample0.json"

        analysis_config["analysis_name"] = analysis_modules.create_name(analysis_config)
        
        analysis_config["analysis_name"] =  'few_shot_' + analysis_config["analysis_name"]

        print(analysis_config["analysis_name"])

        gc.collect()
        torch.cuda.empty_cache()

        trainer = analysis_modules.scs_analyze(**analysis_config)

        gc.collect()
        torch.cuda.empty_cache()

        logs = analysis_modules.parse_logs(trainer).drop_duplicates()

        ANALYSIS_FOLDER=f'reports/analysis_report_{analysis_config["analysis_name"]}'

        logs.to_csv(f'{ANALYSIS_FOLDER}/logs.csv', index=False)

        print('Analysis finished')

few_shot_CodeTrans_no_context_val_20231104
few_shot_CodeTrans_no_context_val_20231104
cuda
cuda
Downloading and preparing dataset json/default to /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-f40c96fb4259060f/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-f40c96fb4259060f/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset json (/home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-3e428f2a49cb177b/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


  0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/195 [00:00<?, ? examples/s]

Loading cached processed dataset at /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-3e428f2a49cb177b/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-10d3461098ef8afa.arrow


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      6.512        0.027        0.006        0.025           0.025   

   eval_bleu  eval_gen_len  
0      0.003        15.426  




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.5861,5.003959,0.2318,0.0752,0.2046,0.21,14.8984,0.0196,0.2651,0.4296,3261,7591
2,4.535,4.915094,0.252,0.0795,0.2115,0.2166,16.118,0.0217,0.3192,0.4669,3544,7591
3,3.7074,4.992931,0.2593,0.0842,0.2195,0.2264,16.9738,0.0259,0.3897,0.5148,3908,7591
4,3.1193,5.014568,0.2589,0.0828,0.2231,0.2281,16.6164,0.029,0.3686,0.5005,3799,7591
5,2.7486,5.098373,0.2559,0.09,0.2211,0.2271,17.741,0.0321,0.4252,0.5391,4092,7591
6,2.4818,5.121419,0.2485,0.0778,0.2133,0.2169,17.0262,0.0278,0.3945,0.5181,3933,7591
7,2.2903,5.147704,0.2663,0.0902,0.2287,0.2324,17.3148,0.0365,0.4131,0.5308,4029,7591
8,2.15,5.18322,0.2633,0.0927,0.2277,0.2311,16.9508,0.0374,0.38,0.5082,3858,7591
9,2.0539,5.160844,0.2597,0.0923,0.227,0.2328,17.2262,0.0385,0.3885,0.514,3902,7591
10,1.9878,5.228225,0.2671,0.0957,0.2329,0.2382,16.8,0.0407,0.3767,0.506,3841,7591


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      5.187         0.27        0.093        0.235            0.24   

   eval_bleu  eval_gen_len  
0      0.042        17.325  
__________
Original: Generates plots of energy consumption/production/both time series of energy consumers/prosumers generated from energy readings in 3-minute intervals.


Summary before Tuning: "" "" "" "" "" "" "" ) ( "" ) ( "" "" "" "" "" ""p046 cons", "" "" "" "" "" "" png ) ""p049 cons", ""p048 cons",


Summary after Tuning: This plots the consumption patterns of all the data and plots the production patterns. The following are presented : a) Plots the consumption patterns of all the data and a ) Plots the exemplary consumption patterns of all the data and a ) Plots the exemplary consumption patterns of all the data with respect to the normals of all the data and a


__________



__________
Original: Scrape the Etherscan API to get source code of smart contracts given the list of t

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/RDC/zinovyee.hub/.cache/huggingface/datasets/json/default-e1587c53fc4b8ea3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]



   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      6.531        0.035        0.004        0.032           0.032   

   eval_bleu  eval_gen_len  
0        0.0        15.615  




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.1829,4.901777,0.2844,0.0971,0.2444,0.2549,17.3425,0.0229,0.3388,0.4802,4251,8852
2,4.2538,4.83309,0.2948,0.1116,0.2506,0.2584,16.9694,0.0296,0.3259,0.4714,4173,8852
3,3.6353,4.795277,0.2977,0.1122,0.2504,0.2604,17.6422,0.0406,0.3742,0.5043,4464,8852
4,3.225,4.877594,0.2983,0.1156,0.2476,0.2585,17.2752,0.0427,0.3579,0.4932,4366,8852
5,2.9056,4.91372,0.2987,0.1171,0.252,0.2629,17.4893,0.0448,0.3772,0.5063,4482,8852
6,2.6717,4.965797,0.297,0.1199,0.2508,0.2611,17.6177,0.0478,0.3732,0.5036,4458,8852
7,2.4747,4.960315,0.2976,0.121,0.2516,0.2623,17.63,0.0491,0.386,0.5123,4535,8852
8,2.3501,5.005836,0.2882,0.1086,0.2438,0.2532,16.9205,0.0417,0.3371,0.4791,4241,8852
9,2.2153,4.998844,0.2915,0.1118,0.2478,0.2568,17.4587,0.0457,0.3687,0.5006,4431,8852
10,2.1328,5.04837,0.2962,0.1183,0.2524,0.2622,17.844,0.0476,0.3795,0.5079,4496,8852


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      5.035        0.294        0.119        0.251            0.26   

   eval_bleu  eval_gen_len  
0      0.049        17.572  
__________
Original: Generates plots of total over-/underestimation errors of naive, LASSO, and LSTM models for multiple energy consumer and prosumer data sets.


Summary before Tuning: ) # #) # # FUN get Predictions. R # #) # predictions c LASSO = naive"


Summary after Tuning: Analyses time series of each time series of a consumer to determine the effects of a given time series.


__________



__________
Original: 'hfhd_marketimpact_diff code plots the static and time-varying


Summary before Tuning: , # =, #,(,(y, ny) } }, #,,,, #, #, #, #, #, #, #, ""netsize1", ""netsize2", ""netsize3", ""net


Summary after Tuning: Plots the impact of the Netsize of the US, for the year and the month of the year and the year of the year and the year of the year and the year of the UAE, for the year 



  0%|          | 0/1 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/195 [00:00<?, ? examples/s]



   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      6.154        0.148        0.043        0.128           0.132   

   eval_bleu  eval_gen_len  
0      0.017         13.58  




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.4957,4.995734,0.2672,0.0948,0.2205,0.227,17.8984,0.0368,0.4459,0.5532,4132,7469
2,4.1169,4.98486,0.2513,0.0858,0.2212,0.2239,17.5967,0.0368,0.4079,0.5272,3938,7469
3,3.006,5.1308,0.2466,0.084,0.2164,0.2205,17.6164,0.0358,0.3867,0.5128,3830,7469
4,2.4003,5.209966,0.2537,0.09,0.2156,0.2231,17.9279,0.0413,0.4056,0.5256,3926,7469
5,2.0523,5.383873,0.2432,0.0914,0.2147,0.2195,17.377,0.0464,0.3995,0.5215,3895,7469
6,1.9045,5.255472,0.2618,0.0983,0.2247,0.2288,17.7738,0.0472,0.4248,0.5388,4024,7469
7,1.7811,5.283758,0.2531,0.0921,0.2138,0.2175,17.6098,0.0474,0.4262,0.5397,4031,7469
8,1.7154,5.308001,0.2567,0.0981,0.2236,0.2278,17.7541,0.0521,0.4109,0.5293,3953,7469
9,1.6693,5.26132,0.2561,0.0992,0.2233,0.2271,17.9508,0.0493,0.4283,0.5412,4042,7469
10,1.63,5.341737,0.2588,0.1007,0.2254,0.2295,17.2984,0.0488,0.3887,0.5141,3840,7469


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      5.317        0.259        0.099        0.224           0.229   

   eval_bleu  eval_gen_len  
0       0.05        17.597  
__________
Original: Generates plots of energy consumption/production/both time series of energy consumers/prosumers generated from energy readings in 3-minute intervals.


Summary before Tuning: Plots time series recorded by energy smart meters and tibbletime.


Summary after Tuning: Plot time series for specified consumers with/without continuous dividends.


__________



__________
Original: Scrape the Etherscan API to get source code of smart contracts given the list of their hashes


Summary before Tuning: Load all of the data from the API token. txt and the root folder of the quantlet.


Summary after Tuning: Scraping and preprocessing of the Ethereum ethereum Ethereum data from a github repository into the root folder of the Quantlet.


__________



__________
Original: 'hfhd_dat



  0%|          | 0/1 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]



   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      6.236        0.141        0.042        0.123           0.126   

   eval_bleu  eval_gen_len  
0      0.011        13.881  




Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len,Bleu,Brevity Penalty,Length Ratio,Translation Length,Reference Length
1,5.0368,4.847597,0.2805,0.1052,0.24,0.2499,18.104,0.0316,0.3323,0.4758,4199,8825
2,3.7804,4.817539,0.2756,0.1094,0.2365,0.2448,18.3272,0.0363,0.3159,0.4646,4100,8825
3,2.9381,4.863218,0.287,0.1152,0.2454,0.2555,18.5076,0.0439,0.3613,0.4955,4373,8825
4,2.4368,5.033069,0.2856,0.1181,0.245,0.2543,18.0092,0.0432,0.3102,0.4607,4066,8825
5,2.118,5.065937,0.283,0.1156,0.2409,0.2519,18.0826,0.0448,0.353,0.4899,4323,8825
6,1.9458,5.1424,0.2771,0.1142,0.2375,0.2455,17.7554,0.0445,0.3403,0.4812,4247,8825
7,1.837,5.064888,0.2951,0.1333,0.2535,0.2614,18.2141,0.0519,0.3285,0.4732,4176,8825
8,1.7666,5.075459,0.3025,0.1349,0.2592,0.2672,18.1407,0.0508,0.34,0.481,4245,8825
9,1.7091,5.05693,0.2882,0.1316,0.2488,0.256,18.0214,0.0507,0.3084,0.4595,4055,8825
10,1.6659,5.059857,0.3011,0.1342,0.2601,0.2688,18.1101,0.0539,0.3335,0.4766,4206,8825


   eval_loss  eval_rouge1  eval_rouge2  eval_rougeL  eval_rougeLsum  \
0      5.055        0.299        0.136        0.257           0.268   

   eval_bleu  eval_gen_len  
0      0.056        18.116  
__________
Original: Generates plots of total over-/underestimation errors of naive, LASSO, and LSTM models for multiple energy consumer and prosumer data sets.


Summary before Tuning: Plots over - and underestimation for each individual dataset.


Summary after Tuning: Generates plots of the over-and underestimation and error analysis for the samples from the Frankfurt Stock Exchange, Frankfurt Stock Exchange  and plots the time series of the 20 largest companies at the 80% significance level from the Frankfurt Stock Exchange  and plots the time series of the D


__________



__________
Original: 'hfhd_marketimpact_diff code plots the static and time-varying


Summary before Tuning: missing - block - length - non - zero - time - series - time - series - time - series - time - series - 