In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import numpy as np
from Classes.Analysis import Analysis
from Classes.Visualizer import Visualizer
import h5py
from joblib import Parallel, delayed
import numpy as np
import pickle
import pandas as pd
from Classes.UltimateAnalysis import UltimateAnalysis
import dask.dataframe as dd

class System(Analysis):
    def __init__(self, graph_location):
        super().__init__(graph_location)

    def odesystem(self, t, Y, *params):
        # start simple, bacteria-resource, see how the bacteria and reosurces grow/shrink, bacteria should hit carrying capacity, resource should reach 0, not negative, etc
        graph_object, phage_nodes, bacteria_nodes, resource_nodes, M, e_vector, tau_vector, v_matrix, K_matrix, r_matrix, B_matrix, environment = params
        graph = graph_object.graph
        def g(N, v, K):
            return (N * v) / (N + K)

        Y = self.check_cutoff(Y)
        
        N, U, I, P = self.unflatten_initial_matrix(Y, [len(resource_nodes), len(bacteria_nodes), (len(bacteria_nodes), M), len(phage_nodes)])
        new_N = np.zeros_like(N)
        new_U = np.zeros_like(U)
        new_I = np.zeros_like(I)
        new_P = np.zeros_like(P)
        #update N vector
        for resource in resource_nodes:
            n_index = resource_nodes.index(resource)
            e_value = e_vector[n_index] 
            sum_g = 0
            sum_u = 0
            sum_i = 0
            for bacteria in bacteria_nodes:
                b_index = bacteria_nodes.index(bacteria)
                if graph.has_edge(bacteria, resource):
                    v = v_matrix[b_index, n_index]
                    K = K_matrix[b_index, n_index]
                    sum_g += g(N[n_index], v, K)
                    sum_u += U[b_index]
                    sum_i += np.sum(I[b_index])
            new_N[n_index] = -(e_value * sum_g) * (sum_u + sum_i) - N[n_index] * environment['washout']
        
        # update U vector, i, and j are flipped relative to what is seen in update N vector for v, K, and r matrices because of how the row and columns are defined in the graph
        # dont sum U in left and right, because we are looking at an individual bacteria
        for uninfected in bacteria_nodes:
            u_index = bacteria_nodes.index(uninfected)
            g_sum = 0
            right = 0
            for resource in resource_nodes:
                n_index = resource_nodes.index(resource)
                if graph.has_edge(uninfected, resource):
                    g_sum += g(N[n_index], v_matrix[u_index, n_index], K_matrix[u_index, n_index])
            for phage in phage_nodes:
                p_index = phage_nodes.index(phage)
                if graph.has_edge(phage, uninfected):
                    right += r_matrix[p_index, u_index] * P[p_index]
            new_U[u_index] = g_sum * U[u_index] - right * U[u_index] - U[u_index] * environment['washout']

        for infected in bacteria_nodes:
            i_index = bacteria_nodes.index(infected)
            for infected_stage in range(0, M):
                if infected_stage == 0:
                    left_sum = 0
                    right_sum = 0
                    for phage in phage_nodes:
                        p_index = phage_nodes.index(phage)
                        if graph.has_edge(phage, infected):
                            left_sum += r_matrix[p_index, i_index] * P[p_index]
                            right_sum += M / tau_vector[i_index] * I[i_index, 0]
                    new_I[i_index, 0] = left_sum * U[i_index] - right_sum - U[i_index] * environment['washout']
                else:
                    m_tau = M / tau_vector[i_index]
                    right = I[i_index, infected_stage - 1] - I[i_index, infected_stage]
                    new_I[i_index, infected_stage] = m_tau * right - new_I[i_index, infected_stage] * environment['washout']
        
        for phage in phage_nodes:
            p_index = phage_nodes.index(phage)
            left_sum = 0
            right_sum = 0
            for infected in bacteria_nodes:
                i_index = bacteria_nodes.index(infected)
                if graph.has_edge(phage, infected):
                    left_sum += B_matrix[p_index, i_index] * M / tau_vector[i_index] * I[i_index, -1]
                    right_sum += r_matrix[p_index, i_index] * (U[i_index] + np.sum(I[i_index])) * P[p_index]
            new_P[p_index] = left_sum - right_sum - P[p_index] * environment['washout']

        flattened_y1 = self.flatten_lists_and_matrices(new_N, new_U, new_I, new_P)
        return flattened_y1


# graph = GraphMakerGUI()
# graph.export_graph('simple_test.gexf')
# graph = System('simple_test.gexf')

# graph = System('example.gexf')
# system = System('simple_test_2.gexf')
system = System('example_3.gexf')
# system.add_item_to_class_attribute('M', 4) # add the M value to the system

phage_nodes = system.get_nodes_of_type('P')
bacteria_nodes = system.get_nodes_of_type('B')
resource_nodes = system.get_nodes_of_type('R')
environemnt_nodes = system.get_nodes_of_type('E')

R0 = system.initialize_new_parameter_from_node(resource_nodes, "Initial_Concentration")
U0 = system.initialize_new_parameter_from_node(bacteria_nodes, "Initial_Population")
I0 = system.initialize_new_matrix(len(U0), system.M)
P0 = system.initialize_new_parameter_from_node(phage_nodes, "Initial_Population")

e_vector = system.initialize_new_parameter_from_node(resource_nodes, 'e')
tau_vector = system.initialize_new_parameter_from_node(bacteria_nodes, 'tau')
v_matrix = system.initialize_new_parameter_from_edges(bacteria_nodes, resource_nodes, 'v')
K_matrix = system.initialize_new_parameter_from_edges(bacteria_nodes, resource_nodes, 'K')
r_matrix = system.initialize_new_parameter_from_edges(phage_nodes, bacteria_nodes, 'r')
B_matrix = system.initialize_new_parameter_from_edges(phage_nodes, bacteria_nodes, 'Burst_Size')

visualizer = Visualizer(system)
visualizer.add_graph_data("Resources", R0, resource_nodes)
visualizer.add_graph_data("Uninfected Bacteria", U0, bacteria_nodes)
visualizer.add_graph_data("Infected Bacteria", I0, row_names=bacteria_nodes, column_names=[f"Infected B{i}" for i in range(int(system.M))], add_rows=4)
visualizer.add_graph_data("Phages", P0 , phage_nodes)

visualizer.add_non_graph_data_vector("e_vector", e_vector, resource_nodes)
visualizer.add_non_graph_data_vector("tau_vector", tau_vector, bacteria_nodes)
visualizer.add_non_graph_data_matrix("v_matrix", v_matrix, bacteria_nodes, resource_nodes)
visualizer.add_non_graph_data_matrix("K_matrix", K_matrix, bacteria_nodes, resource_nodes)
visualizer.add_non_graph_data_matrix("r_matrix", r_matrix, phage_nodes, bacteria_nodes)
visualizer.add_non_graph_data_matrix("B_matrix", B_matrix, phage_nodes, bacteria_nodes)

visualizer.add_other_parameters(phage_nodes, bacteria_nodes, resource_nodes, int(system.M))

In [2]:
# from scipy.integrate import solve_ivp

# t_eval = np.linspace(0, 20, 100)
# paramater_names = ['a', 'b', 'c']
# params_to_test = [[1, 2, 3], [4, 5, 6], [1, 8, 9], [7, 8, 9], [1, 5, 3]]

# def function(t, a):
#     a_val, b_val, c_val = a[0], a[1], a[2]
#     # Simulate function returning a NumPy array (like time or y data)
#     return np.array([0.1 * a_val + b_val, 2.3 * c_val, a_val * c_val])
# values = Parallel(n_jobs=-1)(delayed(
#     lambda x: solve_ivp(function, (0, 20), x)
# )(x) for x in params_to_test)

# output_filename = 'function_results_test.hdf5'
# with h5py.File(output_filename, 'w') as hf:
#     # Add metadata to the root of the file
#     hf.attrs['parameter_names_used'] = paramater_names  # Store metadata as attributes
#     hf.attrs['parameter_values_tested'] = params_to_test  # Store analysis object as an attribute

#     # Create a group to store the results
#     # Store the parameters as a dataset
#     # results_group.create_dataset('parameter_values_tested', data=np.array(params_to_test))
#     for i, item in enumerate(values):
#         results_group = hf.create_group(f'results_{i+1}')
#         results_group.create_dataset(f'y_values', data=item.y)
#         results_group.create_dataset(f't_values', data=item.t)
#         for param_name, param_value in zip(paramater_names, params_to_test[i]):
#             results_group.attrs[param_name] = param_value  # Store parameter values as attributes
# hf.close()

# print(f"Results and parameters saved to {output_filename}")

# dictionary = {
#     'parameter_names_used': paramater_names,
#     'parameter_values_tested': params_to_test,
#     'analysis': visualizer.analysis,
#     'graph_data': visualizer.graph_data,
#     'non_graph_data_vector': visualizer.non_graph_data_vector,
#     'non_graph_data_matrix': visualizer.non_graph_data_matrix,
#     'settings': visualizer.settings,
#     'environment_data': visualizer.analysis.environment_data,
#     'other_parameters': visualizer.other_parameters_to_pass,
#     'hdf_file_location': 'function_results_test.hdf5',
# }

# pickle.dump(dictionary, open('function_results_test.pickle', 'wb'))

In [3]:
# ua = UltimateAnalysis()
# ua.unpack_pickle('function_results_test.pickle')
# query1 = ua.new_query()

# with h5py.File(query1, 'r') as query_in_memory:
#     print("original dataset", list(query_in_memory.keys()))


# d = ua.simple_query(query1, 'Infected Bacteria', '==', 7)
# with h5py.File(d, 'r') as d_in_memory:
#     print("simulations with infected==7", list(d_in_memory.keys()))

# e = ua.simple_query(query1, 'Infected Bacteria', '==', 8)
# with h5py.File(e, 'r') as e_in_memory:
#     print("simulations with infected==8", list(e_in_memory.keys())) 

# f = ua.simple_query(e, 'Resources', '>=', 2)
# with h5py.File(f, 'r') as f_in_memory:
#     print("simulations with Resources >=2", list(f_in_memory.keys())) 

# g = ua.and_query(query1, ['Infected Bacteria', 'Resources'], ['==', '=='], [9, 1])
# with h5py.File(g, 'r') as g_in_memory:
#     print("simulations with infected=9 and Resources==1", list(g_in_memory.keys())) 

# h = ua.or_query(query1, ['Resources', 'Resources'], ['<', '>'], [2, 2])
# with h5py.File(h, 'r') as h_in_memory:
#     print("simulations with resources<2 or Resources>2", list(h_in_memory.keys())) 

# dictionary = ua.finalize_query(h)
# print(dictionary)
# dataframe = ua.finalize_query(d, format='dataframe')
# save dataframe to csv
# dataframe[dataframe['Resources'] == 1]
# print(dataframe.iloc[0]['t_values'])
# for i in range(0, len(dataframe.iloc[0]['y_values'])):
#     print(dataframe.iloc[0]['y_values'][i])
# print(d)
# d2 = ua.simple_query(d, 'Resources', '==', 20)

In [4]:
# import matplotlib.pyplot as plt
# import pandas as pd
# from pprint import pprint

# pickle_object = pd.read_pickle('simulation_results.pickle')
# dataframe = pickle_object['simulation_results']
# dfr10 = dataframe[dataframe['Resources'] == 1]
# print(dfr10)

# plt.figure()
# plt.xlabel("Time")
# plt.ylabel("Values")
# sim1 = dataframe.iloc[0]
# for i in range(len(sim1['y_values'])):
#     plt.plot(sim1['t_values'], sim1['y_values'][i], label=f"item {i}")
# plt.legend()

In [12]:
# Load the Parquet file
# df = pd.read_csv('simulation_results.parquet')
import sys
import dask.dataframe as dd
import math
from IPython.display import display
import time

def convert_size(size_bytes):
   if size_bytes == 0:
       return "0B"
   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
   i = int(math.floor(math.log(size_bytes, 1024)))
   p = math.pow(1024, i)
   s = round(size_bytes / p, 2)
   return "%s %s" % (s, size_name[i])

parquet_file_path = 'SimulationResults/UltimateAnalysis/simulation_results_1746521334.parquet'
time1 = time.time()
ddf = dd.read_parquet(parquet_file_path, engine='pyarrow', 
                      include_partition_columns=True, 
                      gather_statistics=True, 
                    #   columns=['Resources', 'Uninfected Bacteria', 'e_vector', 't_values'], 
                    #   filters=[('Resources', '==', "2.0"), ("Uninfected Bacteria", ">=", "11.1")], 
                      dtype_backend='pyarrow')
ddf['Resources'] = ddf['Resources'].astype('float64')
ddf['e_vector'] = ddf['e_vector'].astype('float64')
time2 = time.time()
execution_time = time2 - time1
print(f"Execution time: {execution_time} seconds")

start_time = time.time()
display(ddf.compute().head())
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

# Query the data in parquet_data
# Filter rows where 'Resources' is equal to 150
# filtered_data = ddf.query('tau_vector>= 0.7')
# result = filtered_data.compute()
# display(result)
res = ddf.query("Resources >= 160")
# display(ddf.head())
time1 = time.time()
display(res.compute().head())
time2 = time.time()
execution_time = time2 - time1
print(f"Execution time: {execution_time} seconds")


Execution time: 0.007280111312866211 seconds


Unnamed: 0,tau_vector,washout,t_values,y_values,Resources,e_vector
0,0.4,0.0,"[0.0, 5.123596586316593e-05, 0.000563595624494...","[[150.0, 149.99342563749028, 149.9276371732764...",150.0,0.8
1,0.4,0.001,"[0.0, 5.130984480789072e-05, 0.000564408292886...","[[150.0, 149.99340846115987, 149.9274481636362...",150.0,0.8
2,0.4,0.005,"[0.0, 5.160736477647414e-05, 0.000567681012541...","[[150.0, 149.99333927553215, 149.9266868402044...",150.0,0.8
3,0.488889,0.0,"[0.0, 5.123596586316593e-05, 0.000563595624494...","[[150.0, 149.99342563748652, 149.9276371715017...",150.0,0.8
4,0.488889,0.001,"[0.0, 5.130984480789072e-05, 0.000564408292886...","[[150.0, 149.9934084611561, 149.92744816185694...",150.0,0.8


Execution time: 0.13190507888793945 seconds


TypeError: Unordered Categoricals can only compare equality or not

In [11]:
from dask.distributed import Client, progress
import dask
client = Client(threads_per_worker=4, n_workers=1)
client
def import_and_filter_parquet():
    parquet_file_path = 'SimulationResults/UltimateAnalysis/simulation_results_1746205712.parquet'
    parquet_data = dd.read_parquet(parquet_file_path)
    # Query the data in parquet_data
    # Filter rows where 'Resources' is equal to 150
    filtered_data = parquet_data.query('Resources == 150')
    return filtered_data

In [12]:
lazy_result = dask.delayed(import_and_filter_parquet)()
filtered_data = dask.compute(lazy_result)
# result = filtered_data.compute()

In [2]:
# Load the Parquet file
# df = pd.read_csv('simulation_results.parquet')
import sys
import dask.dataframe as dd
import math
from IPython.display import display
import time

def convert_size(size_bytes):
   if size_bytes == 0:
       return "0B"
   size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
   i = int(math.floor(math.log(size_bytes, 1024)))
   p = math.pow(1024, i)
   s = round(size_bytes / p, 2)
   return "%s %s" % (s, size_name[i])

parquet_file_path = 'SimulationResults/UltimateAnalysis/simulation_results_1746524442.parquet'
time1 = time.time()
ddf = dd.read_parquet(parquet_file_path, engine='pyarrow', 
                      include_partition_columns=True, 
                      gather_statistics=True, 
                    #   columns=['Resources', 'Uninfected Bacteria', 'e_vector', 't_values'], 
                    #   filters=[('Resources', '==', "2.0"), ("Uninfected Bacteria", ">=", "11.1")], 
                      dtype_backend='pyarrow')
# ddf['Resources'] = ddf['Resources'].astype('float64')
# ddf['e_vector'] = ddf['e_vector'].astype('float64')
time2 = time.time()
execution_time = time2 - time1
print(f"Execution time loading file: {execution_time} seconds")

start_time = time.time()
# display(ddf.compute().head())
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time computing whole tbale: {execution_time} seconds")

# Query the data in parquet_data
# Filter rows where 'Resources' is equal to 150
# filtered_data = ddf.query('tau_vector>= 0.7')
# result = filtered_data.compute()
# display(result)
res = ddf.query("e_vector == 0.1 and tau_vector == 0.7 and B_matrix == 10")
# display(ddf.head())
time1 = time.time()
display(res.compute())
time2 = time.time()
execution_time = time2 - time1
print(f"Execution time for sub table: {execution_time} seconds")


Execution time loading file: 0.03380131721496582 seconds
Execution time computing whole tbale: 1.811981201171875e-05 seconds


Unnamed: 0,Resources,Uninfected Bacteria,e_vector,tau_vector,K_matrix,B_matrix,washout,t_values,y_values
0,150.0,35.0,0.1,0.7,10.0,10.0,0.0,"[0.0, 7.070024257791026e-05, 0.000777702668357...","[[150.0, 149.99902556722168, 149.9892707777031..."
1,150.0,35.0,0.1,0.7,10.0,10.0,0.001,"[0.0, 7.080212373649564e-05, 0.000778823361101...","[[150.0, 149.9990135426854, 149.98913848930934..."
2,150.0,35.0,0.1,0.7,10.0,10.0,0.005,"[0.0, 7.121243853914714e-05, 0.000783336823930...","[[150.0, 149.99896509835443, 149.9886055279389..."
3,150.0,35.0,0.1,0.7,10.0,10.0,0.01,"[0.0, 7.173169075009221e-05, 0.000789048598251...","[[150.0, 149.9989037533709, 149.98793064155507..."
32,150.0,35.0,0.1,0.7,20.0,10.0,0.0,"[0.0, 7.070024262673938e-05, 0.000777702668894...","[[150.0, 149.99908289221426, 149.9899025648276..."
...,...,...,...,...,...,...,...,...,...
2659,183.333333,35.0,0.1,0.7,90.0,10.0,0.01,"[0.0, 7.173172356692081e-05, 0.000789048959236...","[[183.33333333333331, 183.33249451902495, 183...."
2688,183.333333,35.0,0.1,0.7,100.0,10.0,0.0,"[0.0, 7.0700274931474e-05, 0.00077770302424621...","[[183.33333333333331, 183.33266080401245, 183...."
2689,183.333333,35.0,0.1,0.7,100.0,10.0,0.001,"[0.0, 7.080215613719611e-05, 0.000778823717509...","[[183.33333333333331, 183.33264685449788, 183...."
2690,183.333333,35.0,0.1,0.7,100.0,10.0,0.005,"[0.0, 7.121247112970618e-05, 0.000783337182426...","[[183.33333333333331, 183.33259065378664, 183...."


Execution time for sub table: 8.148431062698364 seconds
