In [5]:
#Packages
import openpyxl
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import numpy as np
#Helper functions

def RyR(dataframe):
    """Calculates and prints the RyR values for both fibers, including a cualitative evaluation"""
    i = 0; df_RyR = [] #Preallocate a counter and an empty dataframe where to add the rows
    std = dataframe.iloc[:, :-2].std(axis=1) #Standard deviation for each row, excluding limits
    for index, _ in enumerate(range(dataframe.shape[0])):
        i+= 1
        RyR = (6*std.iloc[index]/(dataframe.iloc[index,-1]-dataframe.iloc[index,-2]))*100 #Calculates RyR for the whole dataframe
        df_RyR.append(float(RyR)) #Adds the values of RyR to a ndarray
    df_RyR = pd.DataFrame(df_RyR) #Builds a new dataframe with the RyR values
    x_fiber = df_RyR[df_RyR.index % 2 != 0] #Indexes the fiber x RyR values in a new dataframe
    y_fiber = df_RyR[df_RyR.index % 2 == 0] #Indexes the fiber y RyR values in a new dataframe
    dfx = pd.DataFrame(x_fiber).reset_index(drop=True) #Resets row index
    dfy = pd.DataFrame(y_fiber).reset_index(drop=True) #Resets row index
    for i, _ in enumerate(range(dfx.shape[0])): #Prints the dataframe for fiber x row by row indexing with the counter
        Pass = "Correct" if dfx.iloc[i, 0] <= 10 else ("Low fail" if 10 <= dfx.iloc[i, 0] <= 25 else "High fail")
        print(f'RyR Guide fbx {dfx.index[i]+1}: ' + str(dfx.iloc[i, 0]) +' %' + ' Status: ' + Pass)
    print("")
    for i, _ in enumerate(range(dfy.shape[0])): #Prints the dataframe for fiber y row by row
        Pass = "Correct" if dfy.iloc[i, 0] <= 10 else ("Low fail" if 10 <= dfy.iloc[i, 0] <= 25 else "High fail")
        print(f'RyR Guide fby {dfy.index[i]+1}: ' + str(dfy.iloc[i, 0]) +' %' + ' Status: ' + Pass)
    return dfx, dfy

def z_score_filter(dataframe, threshold):
    """Applies a z-score to a Dataframe, filtering values that don't apply."""
    rows = []
    MEAS = dataframe.iloc[:, :-2] #Indexes the measurements
    limits = dataframe.iloc[:, -2:]  #Indexes the limits  
    for row in range(MEAS.shape[0]): #Iterates over the rows
        row = MEAS.iloc[row, :]
        z_scores = (row - row.mean()) / row.std() #Calculates the z-score
        filtered_row = np.where(abs(z_scores) <= threshold, row, np.nan) #Applies the threshold as a filter
        rows.append(filtered_row)
    filtered_df = pd.DataFrame(rows) #Builds a new dataframe
    filtered_df = pd.concat([filtered_df, limits], axis=1) #Adds again the columns
    return filtered_df

ModuleNotFoundError: No module named 'sklearn'

In [None]:
#Import the data from a prepocessed working sheet
data = pd.read_excel("./2_Results/Target.xlsx", header=None)
#Slice the measure from the data
df = data.iloc[3:, 1:-2] #Slices the dataframe, deleting the first empty rows and the columns for tags and df_plus_limits
df = df.reset_index(drop=True) #Resets the rows index to start in zero again
df.columns = range(df.shape[1]) #Resets the columns index to start in zero again

In [None]:
resume = df.transpose().describe() #Transpose the df first due to describe() working in columns.
df.transpose().describe() #This second call allows for a nicer output in jupiter.

In [None]:
largest = resume.loc['std'].sort_values(ascending=False)[:5]  #Filter the 5 largest values
index = largest.index.tolist()  #Get the index in a list format
for i in range(largest.shape[0]): #Differentiate between x-fibers (odds) and y-fibers
    if index[i] % 2 == 0:
        index[i] = f"fiber x {index[i]+1}:"
    else:
        index[i] = f"fiber y {index[i]+1}:"
largest.index = index
print("Fibers with largest deviation:")
print(largest)

RyR calculation

In [None]:
#Slice the dataframe to obtain only the measures and limits for each fiber
df_plus_limits = pd.concat([df, data.iloc[3:, -2:].reset_index(drop=True)], axis=1) #Reset the index to avoid index mismatching
df_plus_limits = df_plus_limits.reset_index(drop=True) #Resets the rows index to start in zero again
df_plus_limits.columns = range(df_plus_limits.shape[1]) #Resets the columns index to start in zero again
#Generate a RyR for each fiber in every scale
RyRx, RyRy = RyR(df_plus_limits)

Distribution Fitting

In [None]:
array = df.values #Creates a ndarray with the values of the dataframe
fbx_low = array[0::6].reshape(-1, 1) #Index and reshapes the values for every position
fby_low = array[1::6].reshape(-1, 1)
fbx_medium = array[2::6].reshape(-1, 1)
fby_medium = array[2::6].reshape(-1, 1)
fbx_high = array[4::6].reshape(-1, 1)
fby_high = array[5::6].reshape(-1, 1)
values = np.concatenate([[fbx_low], [fby_low], [fbx_medium], [fby_medium], [fbx_high], [fby_high]]).reshape(6, 120)
plt.hist(values[np.random.randint(1, 6), :], 120)
plt.show()

Filtering

![title](./3_Extra/Z-score.jpg)

In [None]:
#Filter values by their row z-score
z_df = z_score_filter(df_plus_limits, 1)
#Plot the filtered values
plot_scatter(z_df.iloc[:, :-2], 'x', 'Scatter Plot, fiber X', 'test', 'MEAS', 'Guide fbx:')
plot_scatter(z_df.iloc[:, :-2], 'y', 'Scatter Plot, fiber Y', 'test', 'MEAS', 'Guide fby:')

In [None]:
#Split the dataframe for x fiber and y fiber
x_fiber = z_df[z_df.index % 2 == 0]
y_fiber = z_df[z_df.index % 2 != 0]
#Calculate the new RyR
RyRx, RyRy = RyR(z_df)

In [None]:
print(largest.index[0])