In [1]:
import os
import numpy as np
import pandas as pd

class MeanCalculator:
    def __init__(self, directories):
        self.directories = directories
        self.all_subdirectories = []
        self.all_means = []

    def extract_mean(self, file_path):
        with open(file_path) as f:
            # Skip the first 24 lines, which are header information
            for i in range(18):
                next(f)
            data = [float(line.strip().split()[1]) for line in f]
            mean = np.mean(data[:1001])
        return mean

    def process_directory(self, directory):
        means = []
        subdirectories = []
        for subdir, dirs, files in os.walk(directory):
            for file in files:
                if file == "rmsd.xvg":
                    file_path = os.path.join(subdir, file)
                    mean = self.extract_mean(file_path)
                    subdirectory = subdir.split("/")[-1]
                    means.append(mean)
                    subdirectories.append(subdirectory)
        return subdirectories, means

    def calculate_means(self):
        for directory in self.directories:
            subdirectories, means = self.process_directory(directory)
            self.all_subdirectories.extend(subdirectories)
            self.all_means.extend(means)

        self.unique_subdirectories = list(set(self.all_subdirectories))

        self.subdir_to_means = {}
        for subdir, mean, directory in zip(self.all_subdirectories, self.all_means, self.directories * len(self.unique_subdirectories)):
            if subdir not in self.subdir_to_means:
                self.subdir_to_means[subdir] = []
            self.subdir_to_means[subdir].append(mean)

        self.df = pd.DataFrame.from_dict(self.subdir_to_means, orient='index', columns=self.directories)
        self.df = self.df.T
        self.df.columns = [int(col) for col in self.df.columns]
        self.df = self.df.sort_index(axis=1)
        self.df.columns = [str(col) for col in self.df.columns]

    def get_df(self):
        return self.df


In [2]:
# Usage:
mean_calculator = MeanCalculator(["100ns.1", "100ns.2", "100ns.3"])
mean_calculator.calculate_means()
df_2 = mean_calculator.get_df()
df_2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
100ns.1,0.10229,0.123298,0.101595,0.094797,0.105467,0.11742,0.113608,0.128597,0.088409,0.103129,0.088469,0.120258,0.119245,0.104188,0.119557
100ns.2,0.117608,0.096498,0.08763,0.100609,0.112048,0.104936,0.119513,0.13055,0.100389,0.101461,0.098862,0.097226,0.10665,0.122696,0.119568
100ns.3,0.122667,0.09214,0.131991,0.114084,0.095322,0.103398,0.106636,0.133802,0.097583,0.112788,0.090964,0.101044,0.116765,0.12954,0.126481


In [3]:
class MeanCalculator:
    def __init__(self, directories, start=0, end=1001):
        self.directories = directories
        self.start = start
        self.end = end
        self.all_subdirectories = []
        self.all_means = []

    def extract_mean(self, file_path):
        with open(file_path) as f:
            # Skip the first 18 lines, which are header information
            for i in range(18):
                next(f)
            data = [float(line.strip().split()[1]) for line in f]
            mean = np.mean(data[self.start:self.end])
        return mean

    def process_directory(self, directory):
        means = []
        subdirectories = []
        for subdir, dirs, files in os.walk(directory):
            for file in files:
                if file == "rmsd.xvg":
                    file_path = os.path.join(subdir, file)
                    mean = self.extract_mean(file_path)
                    subdirectory = subdir.split("/")[-1]
                    means.append(mean)
                    subdirectories.append(subdirectory)
        return subdirectories, means

    def calculate_means(self):
        for directory in self.directories:
            subdirectories, means = self.process_directory(directory)
            self.all_subdirectories.extend(subdirectories)
            self.all_means.extend(means)

        self.unique_subdirectories = list(set(self.all_subdirectories))

        self.subdir_to_means = {}
        for subdir, mean, directory in zip(self.all_subdirectories, self.all_means, self.directories * len(self.unique_subdirectories)):
            if subdir not in self.subdir_to_means:
                self.subdir_to_means[subdir] = []
            self.subdir_to_means[subdir].append(mean)

        self.df = pd.DataFrame.from_dict(self.subdir_to_means, orient='index', columns=self.directories)
        self.df = self.df.T
        self.df.columns = [int(col) for col in self.df.columns]
        self.df = self.df.sort_index(axis=1)
        self.df.columns = [str(col) for col in self.df.columns]

    def get_df(self):
        return self.df


In [4]:
directories = ["100ns.1", "100ns.2", "100ns.3"]
mean_calculator = MeanCalculator(directories, start=0, end=1001)
mean_calculator.calculate_means()
df = mean_calculator.get_df()
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
100ns.1,0.10229,0.123298,0.101595,0.094797,0.105467,0.11742,0.113608,0.128597,0.088409,0.103129,0.088469,0.120258,0.119245,0.104188,0.119557
100ns.2,0.117608,0.096498,0.08763,0.100609,0.112048,0.104936,0.119513,0.13055,0.100389,0.101461,0.098862,0.097226,0.10665,0.122696,0.119568
100ns.3,0.122667,0.09214,0.131991,0.114084,0.095322,0.103398,0.106636,0.133802,0.097583,0.112788,0.090964,0.101044,0.116765,0.12954,0.126481


In [5]:
dfs = []
for i in range(20):
    start = i * 1001
    end = start + 1001
    mean_calculator = MeanCalculator(directories, start=start, end=end)
    mean_calculator.calculate_means()
    df = mean_calculator.get_df()
    dfs.append(df)


In [6]:
for i in range(0, 15):
    print(dfs[0].iloc[:, i])

100ns.1    0.102290
100ns.2    0.117608
100ns.3    0.122667
Name: 1, dtype: float64
100ns.1    0.123298
100ns.2    0.096498
100ns.3    0.092140
Name: 2, dtype: float64
100ns.1    0.101595
100ns.2    0.087630
100ns.3    0.131991
Name: 3, dtype: float64
100ns.1    0.094797
100ns.2    0.100609
100ns.3    0.114084
Name: 4, dtype: float64
100ns.1    0.105467
100ns.2    0.112048
100ns.3    0.095322
Name: 5, dtype: float64
100ns.1    0.117420
100ns.2    0.104936
100ns.3    0.103398
Name: 6, dtype: float64
100ns.1    0.113608
100ns.2    0.119513
100ns.3    0.106636
Name: 7, dtype: float64
100ns.1    0.128597
100ns.2    0.130550
100ns.3    0.133802
Name: 8, dtype: float64
100ns.1    0.088409
100ns.2    0.100389
100ns.3    0.097583
Name: 9, dtype: float64
100ns.1    0.103129
100ns.2    0.101461
100ns.3    0.112788
Name: 10, dtype: float64
100ns.1    0.088469
100ns.2    0.098862
100ns.3    0.090964
Name: 11, dtype: float64
100ns.1    0.120258
100ns.2    0.097226
100ns.3    0.101044
Name: 12, dtyp

In [7]:
from scipy import stats
f_value, p_value = stats.f_oneway(dfs[0].iloc[:, 7], dfs[1].iloc[:, 7])
if p_value < 0.05:
    print(f"There is a significant difference.")
else:
    print(f"There is no significant difference.")

There is a significant difference.


In [8]:
from scipy import stats
for i in range(1,20):
    for c in range(0, 15):
        f_value, p_value = stats.f_oneway(dfs[0].iloc[:, c], dfs[i].iloc[:, c])
        if p_value < 0.05:
            print(f"There is a significant difference in the means of the groups {(i+1)*10}ns and 10ns for {c+1}")
        else:
            print(f"There is no significant difference in the means of the groups {(i+1)*10}ns and 10ns for {c+1}.")

There is no significant difference in the means of the groups 20ns and 10ns for 1.
There is no significant difference in the means of the groups 20ns and 10ns for 2.
There is no significant difference in the means of the groups 20ns and 10ns for 3.
There is no significant difference in the means of the groups 20ns and 10ns for 4.
There is no significant difference in the means of the groups 20ns and 10ns for 5.
There is no significant difference in the means of the groups 20ns and 10ns for 6.
There is no significant difference in the means of the groups 20ns and 10ns for 7.
There is a significant difference in the means of the groups 20ns and 10ns for 8
There is no significant difference in the means of the groups 20ns and 10ns for 9.
There is no significant difference in the means of the groups 20ns and 10ns for 10.
There is no significant difference in the means of the groups 20ns and 10ns for 11.
There is no significant difference in the means of the groups 20ns and 10ns for 12.
The

In [12]:
import plotly.graph_objs as go
import numpy as np
from scipy import stats

results = []
for i in range(0,20):
    row = []
    for c in range(0, 15):
        f_value, p_value = stats.f_oneway(dfs[0].iloc[:, c], dfs[i].iloc[:, c])
        if p_value < 0.05:
            row.append(1)
        else:
            row.append(0)
    results.append(row)

results = np.array(results)
percentage = 100 * np.count_nonzero(results) / results.size
print(f"Percentage of significant's over the total: {percentage:.2f}%")

data = [go.Heatmap(z=results, x=[i for i in range(1,16)], y=[i*10 for i in range(1,21)], colorscale='Viridis')]
layout = go.Layout(title='Significance of differences in means', xaxis_title='Variant', yaxis_title='Duration (ns)', xaxis=dict(tickmode='array', tickvals=[i for i in range(1, 16)]), yaxis=dict(tickmode='array', tickvals=[i*10 for i in range(1,21)]))
fig = go.Figure(data=data, layout=layout)
fig.show()
fig.write_image("significance_10ns.png")


Percentage of significant's over the total: 40.33%


In [None]:
import plotly.subplots as sp

fig = sp.make_subplots(rows=5, cols=4, subplot_titles=[f"{i*10} ns" for i in range(1,21)], 
                       shared_xaxes=True, shared_yaxes=True, vertical_spacing=0.03, horizontal_spacing=0.03, 
                       column_width=[0.8]*4, row_heights=[0.2, 0.2, 0.2, 0.2, 0.2], specs=[[{'type': 'heatmap'}]*4]*5, 
                       print_grid=False, )
fig.update_layout(height=1000, width=1200, font=dict(size=10))

for i in range(1, 21):
    results = []
    for j in range(1, 21):
        row = []
        for c in range(0, 15):
            f_value, p_value = stats.f_oneway(dfs[j-1].iloc[:, c], dfs[i-1].iloc[:, c])
            if p_value < 0.05:
                row.append(1)
            else:
                row.append(0)
        results.append(row)
    results = np.array(results)
    percentage = 100 * np.count_nonzero(results) / results.size
    print(f"Percentage of significant's over the total for {(i)*10}: {percentage:.2f}%")
    
    heatmap = go.Heatmap(z=results, x=[i for i in range(1,16)], y=[(j)*10 for j in range(1,21)], colorscale='Viridis')
    row = (i-1) // 4 + 1
    col = (i-1) % 4 + 1
    fig.add_trace(heatmap, row=row, col=col)
    
fig.update_xaxes(title_text='Variant', row=5, col=3, title_font=dict(size=20), automargin=True)
fig.update_yaxes(title_text='Duration (ns)', row=3, col=1, title_font=dict(size=20))

fig.show()



Percentage of significant's over the total for 10: 40.33%
Percentage of significant's over the total for 20: 15.00%
Percentage of significant's over the total for 30: 12.33%
Percentage of significant's over the total for 40: 8.00%
Percentage of significant's over the total for 50: 7.67%
Percentage of significant's over the total for 60: 6.00%
Percentage of significant's over the total for 70: 7.00%
Percentage of significant's over the total for 80: 4.00%
Percentage of significant's over the total for 90: 6.00%
Percentage of significant's over the total for 100: 5.33%
Percentage of significant's over the total for 110: 5.67%
Percentage of significant's over the total for 120: 4.00%
Percentage of significant's over the total for 130: 5.67%
Percentage of significant's over the total for 140: 6.67%
Percentage of significant's over the total for 150: 9.00%
Percentage of significant's over the total for 160: 6.67%
Percentage of significant's over the total for 170: 9.33%
Percentage of signif

In [None]:
import plotly.subplots as sp

fig = sp.make_subplots(rows=5, cols=4, subplot_titles=[f"{i*10} ns" for i in range(1,21)], 
                       shared_xaxes=True, shared_yaxes=True, vertical_spacing=0.03, horizontal_spacing=0.03, 
                       column_width=[0.8]*4, row_heights=[0.2, 0.2, 0.2, 0.2, 0.2], specs=[[{'type': 'heatmap'}]*4]*5, 
                       print_grid=False, )
fig.update_layout(height=1000, width=1200, font=dict(size=10))

for i in range(1, 21):
    results = []
    for j in range(1, 21):
        row = []
        for c in range(0, 15):
            f_value, p_value = stats.f_oneway(dfs[j-1].iloc[:, c], dfs[i-1].iloc[:, c])
            if p_value < 0.05:
                row.append(1)
            else:
                row.append(0)
        results.append(row)
    results = np.array(results)
    percentage = 100 * np.count_nonzero(results) / results.size
    print(f"Percentage of significant's over the total for {(i)*10}: {percentage:.2f}%")
    
    heatmap = go.Heatmap(z=results, x=[i for i in range(1,16)], y=[(j)*10 for j in range(1,21)], colorscale='Viridis')
    row = (i-1) // 4 + 1
    col = (i-1) % 4 + 1
    fig.add_trace(heatmap, row=row, col=col)
    
    # Update x-axis properties
    fig.update_xaxes(tickmode='array', tickvals=[i for i in range(1,16)], row=row, col=col)
    
fig.update_xaxes(title_text='Variant', row=5, col=3, title_font=dict(size=20), automargin=True)
fig.update_yaxes(title_text='Simulation Chunk (ns)', row=3, col=1, title_font=dict(size=20))

fig.show()


Percentage of significant's over the total for 10: 40.33%
Percentage of significant's over the total for 20: 15.00%
Percentage of significant's over the total for 30: 12.33%
Percentage of significant's over the total for 40: 8.00%
Percentage of significant's over the total for 50: 7.67%
Percentage of significant's over the total for 60: 6.00%
Percentage of significant's over the total for 70: 7.00%
Percentage of significant's over the total for 80: 4.00%
Percentage of significant's over the total for 90: 6.00%
Percentage of significant's over the total for 100: 5.33%
Percentage of significant's over the total for 110: 5.67%
Percentage of significant's over the total for 120: 4.00%
Percentage of significant's over the total for 130: 5.67%
Percentage of significant's over the total for 140: 6.67%
Percentage of significant's over the total for 150: 9.00%
Percentage of significant's over the total for 160: 6.67%
Percentage of significant's over the total for 170: 9.33%
Percentage of signif