In [25]:
#% pip install openpyxl
import os, re
import pandas as pd
from globals import glob
import os, re
import pandas as pd
from globals import glob
from utilities_database import prepare_data, prepare_database, consult_database, clear_databases, retrieve_data, rename_index, get_date, get_sigma
from utilities_analysis import limits_gen, ini_generator_personalized, RyR, z_score_filter, plot_capability, reset_df
from utilities_plotting import plot_scatter

In [26]:
def mean_calculator(MEAS: pd.DataFrame, lenses_per_nest: int=None) -> pd.DataFrame:
    """Calculate the desired means.
    Parameters:
    - MEAS (pd.DataFrame): Input DataFrame containing fiber measurements.
    - lenses_per_nest (int, optional): Number of lenses per nest for specific means calculation.
    If None, global means are calculated.
    Returns:
    pd.DataFrame: DataFrame containing mean values for fbx and fby.
    If lenses_per_nest is specified, it returns specific means for each position.
    Notes:
    If lenses_per_nest is None:
    - Calculates a global mean for fbx and fby.
    - Returns the mean values for both fbx and fby in a DataFrame.
    - Displays the mean values for fbx and fby.
    If lenses_per_nest is specified:
    - Calculates specific means for each position for fbx and fby based on the number of lenses per nest.
    - Returns a DataFrame containing specific mean values for fbx and fby for each position.
    - Displays the specific mean values for fbx and fby per position."""
    resume = MEAS.transpose().describe() #Transpose the df first due to describe() working in columns.
    rough_means = list(resume.iloc[1, :].values)
    means = []; means_fbx = []; means_fby = [] #Preallocation
    if lenses_per_nest == None: #Calculates a global mean for fbx and for fby
        for i, mean in enumerate(rough_means): #Iterates and rounds every mean value
            means_fbx.append(mean) if i % 2 == 0 else means_fby.append(mean)
            means.append(mean)
        abs_mean_fbx = sum(means_fbx) / len(means_fbx)
        abs_mean_fby = sum(means_fby) / len(means_fby)
        means = [abs_mean_fbx, abs_mean_fby]
        means_df = pd.DataFrame()
        df_list = []
        for _ in range(int(MEAS.shape[0])):  #Iterates over the whole measurements data
            nest_data = []
            for j in range(len(ordered_means)):
                value = float(ordered_means[j])
                nest_data.append(value)
            nest_df = pd.DataFrame({"mean": nest_data})
            df_list.append(nest_df)
        means_df = pd.concat(df_list, axis=0, ignore_index=True)
    else: #Calculates specific means for each position for fbx and fby
        mean_fbx = rough_means[0::2] #Gets fbx values
        mean_fby = rough_means[1::2] #Gets fby values
        for index in range(lenses_per_nest):
            specific_means = mean_fbx[index::lenses_per_nest] #Gets the values of the specific lens for fbx
            abs_mean_fbx = sum(specific_means) / len(specific_means)
            means_fbx.append(abs_mean_fbx)
            specific_means = mean_fby[index::lenses_per_nest] #Gets the values of the specific lens for fby
            abs_mean_fby = sum(specific_means) / len(specific_means)
            means_fby.append(abs_mean_fby)
        means = means_fbx + means_fby
        new_order = [0, 3, 1, 4, 2, 5]
        ordered_means = [means[i] for i in new_order] #Reorder of the means for implementation
        means_df = pd.DataFrame()
        df_list = []
        for _ in range(int(MEAS.shape[0] / (glob.lenses_per_nest * 2))):  #Iterates over every nest (e.g. 24/6=4 nests)
            nest_data = []
            for j in range(len(ordered_means)):
                value = float(ordered_means[j])
                nest_data.append(value)
            nest_df = pd.DataFrame({"mean": nest_data})
            df_list.append(nest_df)
        means_df = pd.concat(df_list, axis=0, ignore_index=True)
    return means_df

In [27]:
#File filtering
extension = "xlsx"
file_list = os.listdir("../a1_input/")
filtered_list = [filename for filename in file_list if filename.endswith(extension)]
print("Files ready for storage:")
for file in filtered_list:
    print(f"    {file}")
#pd.read_excel(os.path.join(os.path.abspath("../a1_input"), filtered_list[0]), skiprows = lambda x: x not in specific_rows,)
data = pd.read_excel(os.path.join(os.path.abspath("../a1_input"), filtered_list[0])) #Import the RyR generator output
df = data.iloc[2:, 1:].reset_index(drop=True) #Slices measures and limits
MEAS = reset_df(df.iloc[:, :-2])
LIMITS = reset_df(df.iloc[:, -2:])
df.reset_index(drop=True, inplace=True) #Reset rows index
df.columns = range(df.shape[1]) #Reset columns index

Files ready for storage:
    TOP_PASSAT_B9_2023y-11m-14d_17h-21m-03s.xlsx
    ~$TOP_PASSAT_B9_2023y-11m-14d_17h-21m-03s.xlsx


In [28]:
means_df = mean_calculator(MEAS, glob.lenses_per_nest)
means_df

###################
[0.3484979591836734, 0.3465081632653062, 0.3508163265306123, 0.35393877551020403]
###################
[0.35763877551020407, 0.3591244897959184, 0.35826938775510203, 0.3588877551020408]
###################
[0.3655959183673469, 0.3629, 0.3656877551020408, 0.37077959183673476]


Unnamed: 0,mean
0,0.32638
1,0.34994
2,0.331086
3,0.35848
4,0.339205
5,0.366241
6,0.32638
7,0.34994
8,0.331086
9,0.35848
