In [1]:
from google.colab import drive
drive.mount('/content/drive')

# Change directory to your code
%cd /content/drive/MyDrive/addition
%pwd   # verify youâ€™re in the right place
!ls    # should show train.py, 4_operands_addition.txt, etc.

Mounted at /content/drive
/content/drive/MyDrive/addition
configuration_files	       gsm_test		  result_analysis.ipynb
configurator.py		       legacy_code	  result_analysis.py
data			       main_utilities.py  result_analysis_script
data_generate.py	       model.py		  results
data_generation_script	       model_rope.py	  startHere.ipynb
error_examples		       model_t5bias.py	  statistical_measurements.py
evaluation.py		       __pycache__	  train.py
extra_result_analysis_scripts  README.md


# I. Generate Data (choose one synthetic task)

## Addition

In [None]:
!python data_generate.py --task addition --num_operands 4 --experiment_name 4_operands_0_to_999_uniform --train_size 1000000 --test_size 10000 --val_size 10000 --train_eval True --sample-size 10000 --generate_reverse True

#### Ablation in Addition (e.g. randomize thousands-place of the output)

In [None]:
!python data_generate.py --task addition --randomize thousands --num_operands 4 --experiment_name 4_operands_0_to_999_output_randomize_thousands --train_size 1000000 --test_size 10000 --val_size 10000 --train_eval True --sample-size 10000 --generate_reverse True


## Multiplication

In [None]:
!python data_generate.py --task multiplication --experiment_name 40_digit_times_1_digit --train_size 1000000 --test_size 10000 --val_size 10000 \
--a_max_digits 40 --b_max_digits 1 --train_eval True --sample-size 10000 --generate_reverse True

## Comparison (Balanced data)

In [None]:
!python data_generate.py --task comparison --experiment_name comparison_bal --train_eval True --sample-size 5000

## Sorting (Doubly balanced data)


In [None]:
!python data_generate.py --task sorting --experiment_name 4_operands_sorting_doubly_balanced --train_eval True --sample-size 5000

# II. Let's Start Training!

#### The .txt file is the configuration file

## 4 Operands Addition

In [None]:
!python train.py 4_operands_addition_reversed.txt

In [None]:
!python train.py 4_operands_addition_plain.txt

## Simpel Multiplication

In [None]:
!python train.py 40_1_digits_mul_reversed.txt

## Comparison

In [None]:
!python train.py comparison_bal.txt

## Sorting

In [None]:
!python train.py 4_operands_sorting_doubly_bal.txt

## Slicing -- Addition

In [None]:
!python train.py slicing_addition_4_operand_plain.txt --batch slicing

In [None]:
!python train.py slicing_addition_4_operand_reverse.txt --batch slicing

## Positional Encoding

In [None]:
!python train.py 4_operands_addition_reversed.txt --PE RoPE

In [None]:
!python train.py 4_operands_addition_reversed.txt --PE t5

### Greedy Decoding

In [None]:
!python train.py 4_operands_addition_reversed.txt --greedy

In [None]:
!python train.py 4_operands_addition_plain.txt --greedy

# III. Result Analysis

## Addition Task

#### Digitwise Error Rates (4 operands addition)

In [None]:
!python result_analysis_script/digitwise_error.py results/4_operands_0_to_999_uniform/reverse_out/4_operands_0_to_999_uniform_reverse/test_reverse_results.csv

#### Fit Normal

In [None]:
!python result_analysis_script/fit_normal.py \
  --input results/4_operands_0_to_999_uniform/reverse_out_early_dense_eval/early_dense_eval_for_normal_distr_4_operands_0_to_999_uniform_reverse/test_reverse_results.csv \
  --iter-start 1000 --iter-end 1800 --iter-step 200 \
  --diff-min -800 --diff-max 800


In [None]:
!python result_analysis_script/fit_normal.py \
  --input results/4_operands_0_to_999_uniform/reverse_out/4_operands_0_to_999_uniform_reverse/test_reverse_results.csv \
  --iter-start 8000 --iter-end 12000 --iter-step 2000 \
  --diff-min -100 --diff-max 100


In [None]:
!python result_analysis_script/fit_normal.py \
  --input results/4_operands_0_to_999_uniform/reverse_out/4_operands_0_to_999_uniform_reverse/test_reverse_results.csv \
  --iter-start 60000 --iter-end 64000 --iter-step 2000 \
  --diff-min -20 --diff-max 20


#### Mutual Information Plot

In [None]:
!python result_analysis_script/plot_mi_metrics.py \
  results/4_operands_0_to_999_uniform/reverse_out_complete_MI_1M_lines/4_operands_0_to_999_uniform_reverse/mi_metrics.csv

## Simple Multiplication Task

#### Digitwise Error (Simple multiplication, Colormap)

In [None]:
!python result_analysis_script/mul_digitwise_error_colormap.py results/40_digit_times_1_digit/reverse_out/40_digit_times_1_digit/test_reverse_results.csv --max_steps 3000

## Comparison Task




#### Comparison Error Rate (Contrast Pairs)

In [9]:
!python result_analysis_script/comparison_error_rate.py \
  results/comparison_bal/comparison_bal_1/thousands_diff_only_results.csv \
  results/comparison_bal/comparison_bal_1/hundreds_diff_only_results.csv \
  results/comparison_bal/comparison_bal_1/tens_diff_only_results.csv \
  results/comparison_bal/comparison_bal_1/units_diff_only_results.csv \
  --output_file_name contrast_pair_error_rate

Saved plot to results/comparison_bal/comparison_bal_1/contrast_pair_error_rate


## Sorting Task

#### Sorting Subskill from 10% to 90% Range

In [None]:
!python result_analysis_script/sorting_acc_10_90_range.py \
  --csv \
    results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction/4_operands_sorting_doubly_balanced_conflicting_same_correction/test_results.csv \
    results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction/4_operands_sorting_doubly_balanced_conflicting_same_correction/digitwise_random_results.csv \
    results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction/4_operands_sorting_doubly_balanced_conflicting_same_correction/digitwise_thousand_results.csv \
    results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction/4_operands_sorting_doubly_balanced_conflicting_same_correction/digitwise_hundred_results.csv \
    results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction/4_operands_sorting_doubly_balanced_conflicting_same_correction/digitwise_ten_results.csv \
  --positions 1,2,3,4 \
  --mode length first second third fourth


#### Sorting Mixing Error

In [None]:
!python result_analysis_script/mixing_error.py results/4_operands_sorting_doubly_balanced/conflicting_same_control_exp_correction_v2/4_operands_sorting_doubly_balanced_conflicting_same_correction_v2/1_3_same_2_4_agreeing_v2_results.csv


# IV. NanoGPT Scaling

#### 20M Parameters

In [None]:
!python train.py 20M_4_operands_addition_reversed.txt

#### 100M Parameters

In [None]:
!python train.py 100M_4_operands_addition_reversed.txt