In [260]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [261]:
def trim_dataset(df):
    # Adding column names to the data
    df.columns = ['Data']
    
    #  Finding the derivative at each point so we can analyze where the data starts changing
    df['Derivative'] = df['Data'].diff()
    
    # Since there is so much deviation in the derivatives, we can find which ones are statistically significant with z-scores
    z_scores = (df['Derivative'] - df['Derivative'].mean()) / df['Derivative'].std()
    
    # Anything more than 3 standard deviations away from the mean will be considered significant 
    z_scores_threshold = 3
    
    # Data structure with all the statistically significant derivatives
    exceptionally_high_indices = df[z_scores > z_scores_threshold].index
    
    end_index = exceptionally_high_indices[-1] + 50 # Some tolerance
    start_index = exceptionally_high_indices[0] - 50 # Some tolerance
        
    # Trim the data
    useful_data = df.loc[start_index:end_index]
    
    return useful_data

In [262]:
def apply_moving_median_filter(df):
    
    # Moving median filter to reduce noise/outliers in the data (very statistically robust) while preserving sharp features which is we want considering we are working with a step function
    
    # Naming column; look into actually naming the data when collecting in the future 💀
    df.columns = ['Voltage']

    window_size = 3 
    window_size_samples = int(window_size * 1000) # Calculate the number of samples for the desired window size 

    # Apply the moving median
    df['Smoothed_Voltage'] = df['Voltage'].rolling(window=window_size_samples, center=True).median()

    # Drop NaN values created by the rolling operation at the edges
    df.dropna(inplace=True)

    return df

In [263]:
def get_voltages_from_calibration_data(df):
    
    window_size = 50 # Checking every 50 values for flatness
    median_indices = []

    df['Derivative'] = df['Smoothed_Voltage'].diff()

    for i in range(0, len(df), window_size):
        window = df['Derivative'].iloc[i:i+window_size]
        if all(window == 0):
            median_index = (i + i + window_size - 1) // 2  # Calculate the median index
            median_indices.append(median_index)
    
    new_df = pd.DataFrame({'Median_Index': median_indices, 'Smoothed_Voltage': df['Smoothed_Voltage'].iloc[median_indices]})

    differences = new_df['Smoothed_Voltage'].diff()

    # Define a threshold for considering a region as a plateau
    threshold = 10  

    # Find the plateaus
    plateau_regions = []
    start_idx = None

    for i, diff in enumerate(differences):
        if abs(diff) > threshold:
            if start_idx is not None:
                plateau_regions.append((median_indices[start_idx], median_indices[i - 1]))
                start_idx = None
        elif start_idx is None:
            start_idx = i

    # Handle the last plateau
    if start_idx is not None:
        plateau_regions.append((median_indices[start_idx], median_indices[-1]))

    median_values = []

    for start, end in plateau_regions:
        # Take the median of the corresponding range in the 'Smoothed_Voltage' column
        median_value = df['Voltage'].iloc[start:end+1].median()
        median_values.append(median_value)

    return median_values

In [264]:
calibration_df = pd.read_csv(r'All_Data\2022_2023_Data\Calibration1.csv')
calibration_df = apply_moving_median_filter(calibration_df)
get_voltages_from_calibration_data(calibration_df)

[-8452.148438,
 -8032.226562,
 -7563.476562,
 -7065.429688,
 -6489.257812,
 -6064.453125]