# Summary of results

#### Importing libraries


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
import json

from constants import *

#### Importing data

In [4]:
with open(TEMP_DIR / "eq_algo_summary.json") as json_file:
    data = json.load(json_file)

In [13]:
efficiencies = []
for eq in data:
    eq_name = eq.split(" ")[1]
    for algo in data[eq]:
        try:
            random_steps = data[eq]["random"]["average_last_n"]
        except:
            continue
        num = data[eq][algo]["finished"]
        efficiency = random_steps/data[eq][algo]["average_last_n"]
        efficiencies.append((eq_name,algo,num,efficiency))
        print(f'{eq_name}, {algo}, {data[eq][algo]["average_last_n"]}, {num}, {efficiency}')

    print()

print(efficiencies)

II.11.17, loss-std, 14.541666666666666, 96, 1.228606031961717
II.11.17, true-confusion, 11.081632653061224, 98, 1.6122154506274802
II.11.17, combinatory, 22.367346938775512, 98, 0.7987527278200015
II.11.17, std, 15.375, 96, 1.1620149191182634
II.11.17, random, 17.8659793814433, 97, 1.0
II.11.17, complexity-std, 15.282828282828282, 99, 1.169023105593448

I.24.6, loss-std, 39.5, 4, 2.0359760159893403
I.24.6, true-confusion, 129.85555555555555, 90, 0.6193116057878074
I.24.6, combinatory, 116.24489795918367, 98, 0.6918243642814902
I.24.6, std, 122.25510204081633, 98, 0.657813467815269
I.24.6, random, 80.42105263157895, 19, 1.0
I.24.6, complexity-std, 18.125, 8, 4.437023593466424

II.11.20, random, 17.912280701754387, 57, 1.0
II.11.20, std, 14.176470588235293, 68, 1.263521875227488
II.11.20, complexity-std, 18.871794871794872, 39, 0.9491561784897026
II.11.20, loss-std, 14.346938775510203, 98, 1.2485088967083429
II.11.20, combinatory, 14.614583333333334, 96, 1.2256442960573208
II.11.20, true

In [22]:
# count how many effiecienies are greater than 1 for each equation. The num is the number of samples. Fewer samples means less confidence in the result. Take that into account when interpreting the results.
for eq in data:
    eq_name = eq.split(" ")[1]
    count = 0
    for algo in data[eq]:
        if algo == "random":
            continue
        try:
            random_steps = data[eq]["random"]["average_last_n"]
        except:
            continue
        num = data[eq][algo]["finished"]
        efficiency = random_steps/data[eq][algo]["average_last_n"]
        if efficiency > 1:
            if num > 40:
                count += 1
            else:
                count += 0.5
    print(f'{eq_name}: {count}/{len(data[eq])-1}')

II.11.17: 4/5
I.24.6: 1.0/5
II.11.20: 4/5
III.13.18: 5/5
I.13.4: 4.5/5
I.6.2b: 0/3


#### Table of results

In [36]:
# make a table where each row is an equation and each column is an algorithm. The value is the number of average steps for that algorithm for that equation. Add the efficiency in brackets. Also add the number of samples in brackets and the standard deviation in brackets.

# make this a pandas dataframe. Export it to pdf.

df = pd.DataFrame(columns=["equation", "algorithm", "samples", "average steps", "std", "efficiency", "efficiency_std"])
for eq in data:
    eq_name = eq.split(" ")[1]
    for algo in data[eq]:
        try:
            random_steps = data[eq]["random"]["average_last_n"]
            random_std = np.std(data[eq]["random"]["last_n"], ddof=1)
        except:
            continue
        num = data[eq][algo]["finished"]
        efficiency = random_steps/data[eq][algo]["average_last_n"]
        std = np.std(data[eq][algo]["last_n"], ddof=1)
        efficiency_std = np.sqrt((random_std/data[eq][algo]["average_last_n"])**2 + ((random_steps/(data[eq][algo]["average_last_n"])**2)*std)**2)
        df = pd.concat([df, pd.DataFrame({"equation": [eq_name], "algorithm": [algo], "samples": [num], "average steps": [data[eq][algo]["average_last_n"]], "std": [std], "efficiency": [efficiency], "efficiency_std": [efficiency_std]})], ignore_index=True)
    # make equation the index
df = df.set_index("equation")
    
#df = df.sort_values(by=["equation", "algorithm"])
#df = df.reset_index(drop=True)
print(df)

# save the dataframe to a csv file
df.to_csv(TEMP_DIR / "eq_algo_summary.csv", index=True)

                algorithm samples  average steps         std  efficiency  \
equation                                                                   
II.11.17         loss-std      96      14.541667    4.225030    1.228606   
II.11.17   true-confusion      98      11.081633    5.954594    1.612215   
II.11.17      combinatory      98      22.367347   11.457126    0.798753   
II.11.17              std      96      15.375000    3.753595    1.162015   
II.11.17           random      97      17.865979    7.197553    1.000000   
II.11.17   complexity-std      99      15.282828    4.373017    1.169023   
I.24.6           loss-std       4      39.500000   29.916551    2.035976   
I.24.6     true-confusion      90     129.855556  114.578815    0.619312   
I.24.6        combinatory      98     116.244898  109.294274    0.691824   
I.24.6                std      98     122.255102  111.272611    0.657813   
I.24.6             random      19      80.421053   46.798998    1.000000   
I.24.6     c

## Feynman info

In [2]:
# import feynman_info.json
import json
with open("feynman_info.json") as json_file:
    data = json.load(json_file)

# count how many equations where found, how many not, and how many were instandly found
found = 0
not_found = 0
instant = 0
failed = 0
for eq in data:
    if data[eq]["failed"]:
        failed += 1
    if data[eq]["found"]:
        if data[eq]["found_instantly"]:
            instant += 1
        else:
            found += 1
    else:
        not_found += 1
    
print(f"found: {found}, not found: {not_found}, instant: {instant}, failed: {failed}")

found: 42, not found: 25, instant: 33, failed: 5
