In [26]:
import numpy as np
import pandas as pd
import random as rd

N = 100000
FEATURES = 10

cols = "abcdefghijkmnopqrstuv"
columns = list(cols)[:FEATURES]

x = np.random.rand(N, FEATURES)

df = pd.DataFrame(x, columns = columns)
df["y"] = np.sin(df["a"].values) + np.cos(df["b"].values) + np.random.rand(N) * 0.001

df.to_csv("data.csv")


unary_funs = ["sinf", "cosf", "sqrtf"]
operators = ["+", "-"]

def random_program(depth=4):
    r = rd.randint(0,100)
    if depth == 0 or r < 30:
        c = rd.choice(columns)
        return f"_{c}_"
    elif r < 80:
        c = rd.choice(unary_funs)
        r = random_program(depth-1)
        return f"{c}({r})"
    else:
        c = rd.choice(operators)
        r1 = random_program(depth-1)
        r2 = random_program(depth-1)
        return f"({r1}) {c} ({r2})"


with open("functions.txt", "w") as f:
    for _ in range(1000):
        f.write(random_program() + "\n")

# Sequential Version

In [27]:
import numpy as np
import pandas as pd
import time

from sympy import sec


df = pd.read_csv("data.csv")

funs = [ line.strip() for line in open("functions.txt").readlines() ]

def score(line):
    for u in ["sinf", "cosf", "tanf", "sqrtf", "expf"]:
        line = line.replace(u, f"np.{u[:-1]}")
    for c in df.columns:
        line = line.replace(f"_{c}_", f"(df[\"{c}\"].values)")
    a = eval(line)
    b = df["y"]
    e = np.square(np.subtract(a, b)).mean()
    return e

l = funs[0]
first_start = time.time()
print(score(l), l)
first_end = time.time()
print(f"1 - Time taken: {first_end - first_start} seconds")

secound_start = time.time()
r = min([ (score(line), line) for line in funs ])
secound_end = time.time()
print(f"2 - Time taken: {secound_end - secound_start} seconds")
print(f"{r[0]} {r[1]}")

0.19149575194659935 sqrtf(sqrtf(cosf(_b_)))
1 - Time taken: 0.0038759708404541016 seconds




2 - Time taken: 2.8357927799224854 seconds
0.034627406950869515 (sinf(cosf(_d_))) + (_a_)


# Parallel Version

In [28]:
import pandas as pd
import torch
import time


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv("data.csv")
funs = [ line.strip() for line in open("functions.txt").readlines() ]

df_torch = {col: torch.tensor(df[col].values, dtype=torch.float32, device=device) 
            for col in df.columns}

def score(line):
    for u in ["sinf", "cosf", "tanf", "sqrtf", "expf"]:
        line = line.replace(u, f"torch.{u[:-1]}")
    for c in df.columns:
        line = line.replace(f"_{c}_", f"(df_torch[\"{c}\"])")
    
    a = eval(line)
    b = df_torch["y"]
    e = torch.square(torch.subtract(a, b)).mean()
    return e.item() 

l = funs[0]
first_start = time.time()
print(score(l), l)
first_end = time.time()
print(f"1 - Time taken: {first_end - first_start} seconds")

secound_start = time.time()
r = min([ (score(line), line) for line in funs ])
secound_end = time.time()
print(f"2 - Time taken: {secound_end - secound_start} seconds")
print(f"{r[0]} {r[1]}")

0.19149574637413025 sqrtf(sqrtf(cosf(_b_)))
1 - Time taken: 0.0011420249938964844 seconds
2 - Time taken: 0.5961143970489502 seconds
0.03462740778923035 (sinf(cosf(_d_))) + (_a_)
