Generate colors gradient for the timesteps code

In [None]:
import pandas as pd

def generate_color_gradient(start_color, end_color, num_repetitions):
    colors = []
    color_range = end_color - start_color + 1

    for _ in range(num_repetitions):
        for i in range(color_range):
            current_color = (start_color + 10*i) % 2000
            colors.append(current_color)

    return colors

start_color = 0  # Start color (0-199)
end_color = 199  # End color (0-199)
num_repetitions = 33   # Number of repetitions

colors = generate_color_gradient(start_color, end_color, num_repetitions)

df = pd.DataFrame({'Color': colors})
df.to_csv('../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/color_gradient.csv', index=False)


Code that calculate difference of embeddings between consecutive timesteps 

In [1]:
import numpy as np

number_timesteps = 200
number_molecules = 33
data_filepath = '../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolate.csv'

data = np.genfromtxt(data_filepath,delimiter=',',encoding='utf-8-sig')

differences = np.zeros((1,128))
for each_timestep in range(number_molecules*number_timesteps):
    if each_timestep % number_timesteps == 0:
        differences = np.vstack((differences,np.zeros((1,128))))
    else:
        difference = data[each_timestep,:128] - data[each_timestep-1,:128]
        differences = np.vstack((differences,difference))

differences = np.delete(differences,0,0)

save_filepath = data_filepath.replace('.csv','diff.csv')
np.savetxt(save_filepath,differences,delimiter=',')

Converts timesteps changes in csv sheet for each molecule

Into a matrix for all molecules (Each molecule a column
of timestep changes in the new matrix)... 

A matrix will be built for each embedding parameter that each molecule has

In [3]:
import csv
import numpy as np
from numpy import genfromtxt, savetxt



def convert_column_to_matrix(column):
    num_rows = int(len(column)/ 200)
    num_cols = 200

    matrix = np.array(column).reshape(num_rows, num_cols).T
    
    return matrix

def save_matrix_to_csv(matrix, output_file):
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(matrix)

def convert_matrix_to_columns(matrix):
    
    num_columns = matrix.shape[1]

    for col_index in range(num_columns):
        column = matrix[:, col_index]
        result_matrix = convert_column_to_matrix(column)
        
        num_rows = result_matrix.shape[0]
        
        # concatenate columns for row counting, average, standard deviation, upper bound, and lower bound
        row_count = np.arange(1, num_rows + 1).reshape(num_rows, 1)
        average = np.mean(result_matrix, axis=1).reshape(num_rows, 1)
        std_dev = np.std(result_matrix, axis=1).reshape(num_rows, 1)
        upper_bound = average + std_dev
        lower_bound = average - std_dev
        
        result_matrix = np.concatenate((row_count, result_matrix, average, std_dev, upper_bound, lower_bound), axis=1)
        
        output_file = data_filepath.replace('.csv','') +  f'column_{col_index + 1}_result.csv'  # File name for each column's result
        save_matrix_to_csv(result_matrix, output_file)
        print(f"Saved result for column {col_index + 1} to {output_file}")


# Usage example
data_filepath = '../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediff.csv'
input_matrix = genfromtxt(data_filepath,delimiter=',',encoding='utf-8-sig')


convert_matrix_to_columns(input_matrix)


Saved result for column 1 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediffcolumn_1_result.csv
Saved result for column 2 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediffcolumn_2_result.csv
Saved result for column 3 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediffcolumn_3_result.csv
Saved result for column 4 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediffcolumn_4_result.csv
Saved result for column 5 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpcaisolatediffcolumn_5_result.csv
Saved result for column 6 to ../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/overtimesteps/Oembsrxnpc

Tracking the FG path of the reaction over the timesteps

In [1]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../data/fgtransform/model1/oxidation/1alcsoxpert3/rxns/timestep200-scale0.005/1000scan/rxn/predicted_classesqm9.csv')

# Specify the column name you want to analyze
column_name = 'rxn'

# Create empty lists to store the unique elements and their counts
unique_elements = []
element_counts = []

# Iterate over every 200 datapoints
for i in range(0, len(df[column_name]), 200):
    # Get the 200 datapoints in the current range
    datapoints = df[column_name][i:i+200]

    # Find unique elements and their counts
    unique = datapoints.unique()
    counts = datapoints.value_counts()

    # Append unique elements and counts to respective lists
    unique_elements.append(unique)
    element_counts.append(counts)

# Print the ordered list of unique elements and their counts
for i, unique in enumerate(unique_elements):
    print(f"Data Range {i+1}:")
    for element in unique:
        count = element_counts[i].get(element, 0)
        print(f"Element: {element}, Count: {count}")
    print()


Data Range 1:
Element: 2, Count: 7
Element: 7, Count: 193

Data Range 2:
Element: 2, Count: 13
Element: 7, Count: 187

Data Range 3:
Element: 2, Count: 13
Element: 7, Count: 187

Data Range 4:
Element: 3, Count: 1
Element: 2, Count: 6
Element: 7, Count: 193

Data Range 5:
Element: 3, Count: 2
Element: 2, Count: 5
Element: 7, Count: 193

Data Range 6:
Element: 2, Count: 5
Element: 7, Count: 195

Data Range 7:
Element: 3, Count: 6
Element: 2, Count: 7
Element: 7, Count: 187

Data Range 8:
Element: 3, Count: 6
Element: 2, Count: 7
Element: 7, Count: 187

Data Range 9:
Element: 3, Count: 2
Element: 2, Count: 4
Element: 7, Count: 194

Data Range 10:
Element: 3, Count: 1
Element: 2, Count: 12
Element: 7, Count: 187

Data Range 11:
Element: 3, Count: 8
Element: 2, Count: 6
Element: 7, Count: 186

Data Range 12:
Element: 3, Count: 2
Element: 2, Count: 5
Element: 7, Count: 193

Data Range 13:
Element: 3, Count: 2
Element: 2, Count: 4
Element: 7, Count: 194

Data Range 14:
Element: 3, Count: 1
E