In [6]:
import pandas as pd
import numpy as np
import statistics
import math
import pyblock

# Load data
df = pd.read_csv("57/results3.txt", sep=' ')

# from kcal/mol to ev/formate 
def apply_scaling(row):
    if '_full_' in row['model']:
        return row['energy'] * 0.0433641153087705 / 36
    elif '_half_' in row['model']:
        return row['energy'] * 0.0433641153087705 / 18
    else:
        return row['energy']

# Rename columns
df.columns = ['model', 'timestep', 'energy']

df['energy'] = df.apply(apply_scaling, axis=1)

# Filter models ending with *_300.0txt
df = df[df['model'].str.endswith('300.0.txt')]

# Convert timestep to nanoseconds
df['timestep'] = round(df['timestep'] * 0.5e-6, 1)

# Group by model and aggregate energy values into lists
df = df.groupby(['model']).agg(list).reset_index()

# Calculate mean and standard deviation for the last 50 energy values
df['mean_energy'] = df['energy'].apply(lambda x: statistics.mean(x[-50:]))
df['std_energy'] = df['energy'].apply(lambda x: statistics.stdev(x[-50:]))
df['variance'] = df['energy'].apply(lambda x: statistics.variance(x[-50:]))

# Values to be used in the loop
values = [9, 17, 25, 33, 41, 49, 57, 65]

# Initialize lists to store the results
models = []
mean_energy_differences = []
std_energy_differences = []

# Loop through the values and calculate the mean and standard deviation differences
for value in values:
    tet_full_model = f'{value}_tet_full_300.0.txt'
    int_full_model = f'{value}_int_full_300.0.txt'
    tet_half_model = f'{value}_tet_half_300.0.txt'
    int_half_model = f'{value}_int_half_300.0.txt'
    
    # Check if the models exist in the DataFrame before calculating the differences
    if tet_full_model in df['model'].values and int_full_model in df['model'].values:
        mean_energy_full_diff = ((df[df['model'] == tet_full_model]['mean_energy'].values[0] - 
                                  df[df['model'] == int_full_model]['mean_energy'].values[0]))
        
        std_full = math.sqrt((((df[df['model'] == tet_full_model]['std_energy'].values[0])**2)) + 
                               (((df[df['model'] == int_full_model]['std_energy'].values[0])**2)))
        
        models.append(f'{tet_full_model} - {int_full_model}')
        
        mean_energy_differences.append(mean_energy_full_diff)
        std_energy_differences.append(std_full)
    
    if tet_half_model in df['model'].values and int_half_model in df['model'].values:
        mean_energy_half_diff = ((df[df['model'] == tet_half_model]['mean_energy'].values[0] - 
                                  df[df['model'] == int_half_model]['mean_energy'].values[0]))
        std_half = ((math.sqrt((((df[df['model'] == tet_half_model]['std_energy'].values[0])**2)) + 
                               (((df[df['model'] == int_half_model]['std_energy'].values[0])**2)))))
        
        models.append(f'{tet_half_model} - {int_half_model}')
        
        mean_energy_differences.append(mean_energy_half_diff)
        std_energy_differences.append(std_half)

# Create a new DataFrame with the results
diff_df = pd.DataFrame({
    'model': models,
    'mean_energy_difference': mean_energy_differences,
    'std_energy_difference': std_energy_differences
})

print(diff_df)

                                            model  mean_energy_difference  \
0     9_tet_full_300.0.txt - 9_int_full_300.0.txt                0.010600   
1     9_tet_half_300.0.txt - 9_int_half_300.0.txt                0.179190   
2   17_tet_full_300.0.txt - 17_int_full_300.0.txt               -0.008287   
3   17_tet_half_300.0.txt - 17_int_half_300.0.txt                0.151774   
4   25_tet_full_300.0.txt - 25_int_full_300.0.txt               -0.006794   
5   25_tet_half_300.0.txt - 25_int_half_300.0.txt                0.130911   
6   33_tet_full_300.0.txt - 33_int_full_300.0.txt               -0.016430   
7   33_tet_half_300.0.txt - 33_int_half_300.0.txt                0.191139   
8   41_tet_full_300.0.txt - 41_int_full_300.0.txt                0.017707   
9   41_tet_half_300.0.txt - 41_int_half_300.0.txt                0.173216   
10  49_tet_full_300.0.txt - 49_int_full_300.0.txt               -0.010191   
11  49_tet_half_300.0.txt - 49_int_half_300.0.txt                0.125178   

In [9]:
df


Unnamed: 0,model,timestep,energy,mean_energy,std_energy,variance
0,17_int_full_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-266.0605294769614, -266.0665522707543, -266....",-266.064119,0.006975,4.9e-05
1,17_int_half_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-525.5634410722298, -525.5538046021612, -525....",-525.565417,0.01338,0.000179
2,17_tet_full_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-266.0749841820643, -266.0641431532371, -266....",-266.072406,0.00603,3.6e-05
3,17_tet_half_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-525.4116666686491, -525.4068484336148, -525....",-525.413642,0.015156,0.00023
4,25_int_full_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-380.0635840649604, -380.0720159762705, -380....",-380.073365,0.009046,8.2e-05
5,25_int_half_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-753.6731423014655, -753.649051126294, -753.6...",-753.652906,0.015726,0.000247
6,25_tet_full_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-380.0828570050977, -380.0924934751663, -380....",-380.080159,0.008197,6.7e-05
7,25_tet_half_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-753.5430499555392, -753.5165496628505, -753....",-753.521994,0.015942,0.000254
8,33_int_full_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-494.0895252693724, -494.1172301208197, -494....",-494.093308,0.008363,7e-05
9,33_int_half_300.0.txt,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[-981.7370702978753, -981.7756161781498, -981....",-981.74902,0.020843,0.000434


In [11]:
import pandas as pd
import numpy as np
import statistics
import math
import pyblock

# Load data
# Load data
df = pd.read_csv("57_tet.data", sep=' ')

# Rename columns
df.columns = ['energy']

# Convert energy from kcal/mol to eV/formate
df['energy'] = df['energy'] * 0.0433641153087705 / 36

# Convert 'energy' column into a list of lists
dff = df['energy'].tolist()
#energy_list_of_lists = [[x] for x in energy_list]

# Create a new DataFrame with a single row and a column containing the list of lists
dff = pd.DataFrame({
    'energy_list': [dff]
}, index=[1])

# Display the result DataFrame
print(dff)

dff['mean_energy'] = dff['energy_list'].apply(lambda x: statistics.mean(x[-50:]))
dff['std_energy'] = dff['energy_list'].apply(lambda x: statistics.stdev(x[-50:]))
dff['variance'] = dff['energy_list'].apply(lambda x: statistics.variance(x[-50:]))

                                         energy_list
1  [-836.272952548973, -836.3993468995104, -836.2...


In [12]:
dff

Unnamed: 0,energy_list,mean_energy,std_energy,variance
1,"[-836.272952548973, -836.3993468995104, -836.2...",-836.214649,0.062369,0.00389


In [None]:
import pandas as pd
import numpy as np
import statistics
import math
import pyblock

# Load data
df = pd.read_csv("57_tet.data", sep=' ')

# Rename columns
df.columns = ['energy']

# from kcal/mol to ev/formate 
def apply_scaling(row):
    return row['energy'] * 0.0433641153087705 / 36

df['energy'] = df.apply(apply_scaling, axis=1)

df['energy'] = df['energy'].apply(lambda x: [x])


# Calculate mean and standard deviation for the last 50 energy values
df['mean_energy'] = df['energy'].apply(lambda x: statistics.mean(x[-50:]))
df['std_energy'] = df['energy'].apply(lambda x: statistics.stdev(x[-50:]))
df['variance'] = df['energy'].apply(lambda x: statistics.variance(x[-50:]))

In [None]:
df

In [None]:

# Load data
df = pd.read_csv("57_tet.data", sep=' ')

# Rename columns
df.columns = ['energy']

# Convert energy from kcal/mol to eV/formate
df['energy'] = df['energy'] * 0.0433641153087705 / 36

# Get all the energy values as a single string with comma separation
all_energies = ','.join(df['energy'].astype(str))

# Ensure that the DataFrame has at least 50 rows to avoid errors
if len(df) >= 500:
    # Calculate mean, standard deviation, and variance for the last 50 energy values
    last_50_energies = df['energy'].tail(50)
    mean_energy = statistics.mean(last_50_energies)
    std_energy = statistics.stdev(last_50_energies)
    variance_energy = statistics.variance(last_50_energies)
else:
    mean_energy = float('nan')
    std_energy = float('nan')
    variance_energy = float('nan')

# Create a new DataFrame with the results
result_df = pd.DataFrame({
    'all_energies': [all_energies],
    'mean_energy': [mean_energy],
    'std_energy': [std_energy],
    'variance_energy': [variance_energy]
})

# Display the result DataFrame
print(result_df)

In [None]:

import pandas as pd
import numpy as np
import statistics
import math
import pyblock

# Load data
df = pd.read_csv("57_int.data")


# Rename columns
df.columns = ['energy']

# Convert energy from kcal/mol to eV/formate
df['energy'] = df['energy'] * 0.0433641153087705 / 36

# Get all the energy values as a single string with comma separation
all_energies = ','.join(df['energy'].astype(str))

# Ensure that the DataFrame has at least 50 rows to avoid errors
if len(df) >= 500:
    # Calculate mean, standard deviation, and variance for the last 50 energy values
    last_50_energies = df['energy'].tail(50)
    mean_energy = statistics.mean(last_50_energies)
    std_energy = statistics.stdev(last_50_energies)
    variance_energy = statistics.variance(last_50_energies)
else:
    mean_energy = float('nan')
    std_energy = float('nan')
    variance_energy = float('nan')

# Create a new DataFrame with the results
result_df = pd.DataFrame({
    'all_energies': [all_energies],
    'mean_energy': [mean_energy],
    'std_energy': [std_energy],
    'variance_energy': [variance_energy]
})

# Display the result DataFrame
print(result_df)

In [None]:
energy_diff= -836.199303 - (-836.199848)

print (energy_diff)

standart_dev = math.sqrt((0.056838)**2 + (0.055355)**2)

print(standart_dev)

0.0793

In [None]:
last_50_energies