### Data processing

In [1]:
import pandas as pd
import numpy as np

df_high = pd.read_csv("high_values/top_sequences.txt", sep="\t")
df_high = df_high.drop(" ", axis=1)
df_raw = pd.read_excel("produce/ralstonia_high.xlsx", index_col="Samples")

X_right = np.array(df_raw.drop("ralstonia", axis=1).values, dtype=np.float32)
y_right = np.array(df_raw["ralstonia"].values, dtype=np.float32)

In [2]:
import torch
import torch.nn.functional as F

class Siamese(torch.nn.Module):
    def __init__(self):
        super(Siamese, self).__init__()
        self.nn1 = torch.nn.Linear(42,30)
        self.nn2 = torch.nn.Linear(30,1)

    def forward(self, x1, x2):
        o1 = self.nn1(x1)
        o1 = F.relu(o1)
        o1 = self.nn2(o1)
            
        o2 = self.nn1(x2)
        o2 = F.relu(o2)
        o2 = self.nn2(o2)
        return torch.sub(o1,o2)
    
class Twin(torch.nn.Module):
    def __init__(self):
        super(Twin, self).__init__()
        self.nn1 = torch.nn.Linear(84,60)
        self.nn2 = torch.nn.Linear(60,30)
        self.nn3 = torch.nn.Linear(30,1) 

            
    def forward(self, x1):
        o1 = self.nn1(x1)
        o1 = F.relu(o1)
        o1 = self.nn2(o1)
        o1 = F.relu(o1)
        o1 = self.nn3(o1)     
        return o1

In [89]:
import copy

def get_ralstonia_value(row, X_right, model_arch):
    X_left = np.broadcast_to(np.array(row), (X_right.shape))

    if model_arch == "siamese":
        X_left = torch.tensor(X_left, dtype=torch.float32).to(device)
        X_right = torch.tensor(X_right, dtype=torch.float32).to(device)
        y_pred = model(X_left, X_right)
    elif model_arch == "twin":
        X_pred = torch.tensor(np.concatenate((X_left, X_right), axis=1), dtype=torch.float32).to(device)
        y_pred = model(X_pred)
    
    y_pred = y_pred.cpu().detach().numpy()
    y_pred = y_pred.squeeze()
    y_abs = np.mean(y_pred + y_right)
    return y_abs

def perturb(row, X_right, model_arch, last_high, fork, high_val):
    row_new = copy.deepcopy(row)
    ralst_vals = []
    for index in range(len(row_new)):
        row_new[index] = 1-int(row_new[index])
        new_val = get_ralstonia_value(row_new, X_right, model_arch)
        ralst_vals.append(new_val)
        row_new = copy.deepcopy(row)
        
    best_row = copy.deepcopy(row)
    best_row[np.argmax(ralst_vals)] = 1-best_row[np.argmax(ralst_vals)]
    ralst_best_row = get_ralstonia_value(best_row, X_right, model_arch)
    with open("{}.txt".format(fork), "a") as f:
        for item in best_row:
            f.write("{} ".format(item))
        f.write("{}\n".format(ralst_best_row))
    
    if ralst_best_row > high_val:
        perturb(best_row, X_right, model_arch, last_high, fork, ralst_best_row)
    else:
        print("Converged")
#     return np.max(ralst_vals)
#         if new_val > last_high:
#             print(last_high, new_val)
#             last_high = new_val
#             perturb(row_new, X_right, model_arch, last_high)
#         else:
#             print("Not improved")
#             row_new = copy.deepcopy(row)
        
        

In [94]:
# Perturb for siamese fork1
model_arch = 'siamese'
device = "cuda"
fork = "fork1"
model = Siamese()
model = torch.load("produce/saved_models/high_fork1.pt").to(device)
high_val = 0
for i in range(df_high.shape[0]):
    row = df_high.iloc[i].values.tolist()
    perturb(row, X_right, model_arch, last_high, fork, high_val)
    
    

Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged


In [91]:
# Perturb for siamese fork1
model_arch = 'siamese'
device = "cuda"
fork = "fork2"
model = Siamese()
model = torch.load("produce/saved_models/high_fork2.pt").to(device)
high_val = 0
for i in range(df_high.shape[0]):
    row = df_high.iloc[i].values.tolist()
    perturb(row, X_right, model_arch, last_high, fork, high_val)

Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged


In [92]:
# Perturb for siamese fork1
model_arch = 'twin'
device = "cuda"
fork = "fork3"
model = Siamese()
model = torch.load("produce/saved_models/high_fork3.pt").to(device)
high_val = 0
for i in range(df_high.shape[0]):
    row = df_high.iloc[i].values.tolist()
    perturb(row, X_right, model_arch, last_high, fork, high_val)

Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged


In [93]:
# Perturb for siamese fork1
model_arch = 'twin'
device = "cuda"
fork = "fork4"
model = Siamese()
model = torch.load("produce/saved_models/high_fork4.pt").to(device)
high_val = 0
for i in range(df_high.shape[0]):
    row = df_high.iloc[i].values.tolist()
    perturb(row, X_right, model_arch, last_high, fork, high_val)

Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
Converged
