# Script Info

This notebook is just the sensitivity analysis script in a notebook form so that you could load in a bunch of data from a folder and based on the names in the folder you can filter it out based on what you want your plots to look at.

The code section bellow is the code required to process the data frames for a specific folder


In [1]:
#########################################
# Packages/ other python files to import.
#########################################

import ae_process_algos as aepe
from pathlib import Path
from os import listdir
import pandas as pd
import numpy as np

#########################################
# User inputs
#########################################

folder_with_data = "data_out"

sample_rate = ((125E6)/32) # The ammount of samples taken in a secound.

lower_frequency = 120000
higher_frequency = 180000
threshold = 0.2
roll_off = 50

#########################################
# Functions To Run
#########################################

def compute_all_ae_processing_algos(signal,sample_rate,lower_frequency,higher_frequency,threshold,roll_off):
    """
    Computes all the ae processing algos for a specific row.
    Meant to be used with the apply statement in pandas.
    Row format is the one that measure produces in this git page.

    Args:
        x: represents the row in the data frame
        sample_rate: the sample rate when recording the signal
        lower_frequency: the lower frequency that is choosen for band energy calculations
        higher_frequency: the higher frequency that is choosen for band energy calculations
        threshold: the amplitude where it will start counting above that.
        roll_off: percentage for the roll off calculations (0-100)
    
    Returns:
        A list of all the new properties to add to the row.

    """

    # Set up to run functions one after another.
    spectrum = aepe.singal_to_Spectrum(signal)

    return pd.Series({
            "band_energy": aepe.band_energy(spectrum,sample_rate,lower_frequency,higher_frequency),
            "band_energy_ratio": aepe.band_energy_ratio(spectrum,sample_rate,lower_frequency,higher_frequency),
            "clearance_factor": aepe.clearance_factor(signal),
            "counts": aepe.counts(signal,threshold),
            "crest_factor": aepe.crest_factor(signal),
            "energy": aepe.energy(signal),
            "impulse_factor": aepe.impulse_factor(signal),
            "k_factor": aepe.k_factor(signal),
            "kurtosis": aepe.kurtosis(signal),
            "margin_factor": aepe.margin_factor(signal),
            "peak_amplitude": aepe.peak_amplitude(signal),
            "rms": aepe.rms(signal),
            "shape_factor": aepe.shape_factor(signal),
            "skewness": aepe.skewness(signal),
            "spectral_centroid": aepe.spectral_centroid(spectrum,sample_rate),
            "spectral_kurtosis": aepe.spectral_kurtosis(spectrum,sample_rate),
            "spectral_peak_frequency": aepe.spectral_peak_frequency(spectrum,sample_rate),
            "spectral_rolloff": aepe.spectral_rolloff(spectrum,sample_rate,roll_off),
            "spectral_skewness": aepe.spectral_skewness(spectrum,sample_rate),
            "spectral_variance": aepe.spectral_variance(spectrum,sample_rate),
            "zero_crossing_rate": aepe.zero_crossing_rate(signal,sample_rate)
            })

#########################################
# Setup
#########################################

# Files in folder we are working on.
files_in_folder = listdir(folder_with_data)

# List the properties that will be calculated.
properties_to_calculate = [
    "band_energy",
    "band_energy_ratio",
    "clearance_factor",
    "counts",
    "crest_factor",
    "energy",
    "impulse_factor",
    "k_factor",
    "kurtosis",
    "margin_factor",
    "peak_amplitude",
    "rms",
    "shape_factor",
    "skewness",
    "spectral_centroid",
    "spectral_kurtosis",
    "spectral_peak_frequency",
    "spectral_rolloff",
    "spectral_skewness",
    "spectral_variance",
    "zero_crossing_rate"]

# Creating a blank data frame to hold all the processed data means and standard deviations from each file
# Will then be used to compare against each other with line plots and see how it all differs
normal_mean_processed = pd.DataFrame(
    columns=properties_to_calculate, index=files_in_folder)
normal_std_processed = pd.DataFrame(
    columns=properties_to_calculate, index=files_in_folder)


#########################################
# Main Function running.
#########################################

# process signal data and save mean and standard deviation.
for file in files_in_folder:

    # Importing dataframe
    file_data_frame = pd.read_csv(Path(folder_with_data + "/" + file))

    file_data_frame = file_data_frame[processed_data_frame["Excitation Freq"] == 200000]

    # Converting signal column to numpy array from string
    file_data_frame['Signal'] = file_data_frame['Signal'].apply(
        lambda x: list(map(float, ast.literal_eval(x))) if pd.notnull(x) else [])
    file_data_frame['Signal'] = file_data_frame['Signal'].apply(np.array)

    # Using the apply statement to calculate all ae properties for each data frame row
    # Then adding a column for that property
    processed_data_frame = file_data_frame.apply(
            lambda row: compute_all_ae_processing_algos(
                row["Signal"],
                sample_rate,
                lower_frequency,
                higher_frequency,
                threshold,
                roll_off
                ),axis=1)

    print("Next file")
    #TODO: Create a function to make a string to run all of the relevent functions that a user defines in a list. Make it create the new columns in the data frame and run the funtions of the values.
    #TODO: create a function to execute the ae processing algos that we specify in a list. Make it a function.

    # Getting mean and standard deviation of properties and placing in tables.
    normal_mean_processed.loc[file] = processed_data_frame.mean(axis=0)
    normal_std_processed.loc[file] = processed_data_frame.std(axis=0)

# Deleating data frames to free up memory once processing is compleated.
del(file_data_frame)
del(processed_data_frame)
print(normal_mean_processed)
print(normal_std_processed)

NameError: name 'processed_data_frame' is not defined

## Above print statements

Above you should see a reduced table for the mean and standard deviation. The next file statement just helps to keep track of how many data frames it has processed when it is running.

## Bellow code

The bellow code combines the mean and standard deviation tables to then use the apply statement on the new data frame.

In [None]:
# Combining the standard deviation and mean tables to allow the application of the apply statement
import pandas as pd

# Adding a prefex to the column headers of mean and std dataframes respectivly
normal_mean_processed.columns = [f"mean_{col}" for col in normal_mean_processed.columns]
normal_std_processed.columns = [f"std_{col}" for col in normal_std_processed.columns]
combined_normal_mean_std = pd.concat([normal_mean_processed, normal_std_processed], axis=1)

print(combined_normal_mean_std)
print(combined_normal_mean_std.columns)

## Above print statments

When the above code block runs you will see a data frame of the combined mean and standard deviation data and the column names in the data frame.

## Bellow function

The bellow function is a simple way to plot a line onto a plt plot. Needs other code around it to make sure lines do not go onto the same plot if we are trying to avoid that.

In [None]:
# defining the apply function to use on the data frame.

import matplotlib.pyplot as plt
import scipy.stats as stats

def normal_dis_plot(mean,std,line_name):
    """
    Function to take the mean and standard deviation and plot the normal distrabution
    
    Args:
        mean: normal distrabution mean
        std: normal distrabution standard deviation
        line_name: name that line will be given

    Returns:
        plots a line of the normal distrabution

    """
    x_values = np.linspace(mean - 1*std, mean + 1*std, 100)
    plt.plot(x_values, stats.norm.pdf(x_values, mean, std), label=line_name)


# Filter point here change like statement as required

The statement bellow is where you can change what orignal data frames that have been processed you actually want to make charts for. ie changing the string in the like=... bit will allow you to only include the files containing that text to be included in the pdf doc that will be made.

This allows you to make multiple different pdf pages easier as all the data frames have been processed already and you just have to rerun the filter and pdf making code sections.

In [None]:
# Filtering of the processed data frame operation
filtered_combined_normal_mean_std = combined_normal_mean_std.filter(like="Head",axis=0)
print(filtered_combined_normal_mean_std)

Empty DataFrame
Columns: [mean_band_energy, mean_band_energy_ratio, mean_clearance_factor, mean_counts, mean_crest_factor, mean_energy, mean_impulse_factor, mean_k_factor, mean_kurtosis, mean_margin_factor, mean_peak_amplitude, mean_rms, mean_shape_factor, mean_skewness, mean_spectral_centroid, mean_spectral_kurtosis, mean_spectral_peak_frequency, mean_spectral_rolloff, mean_spectral_skewness, mean_spectral_variance, mean_zero_crossing_rate, std_band_energy, std_band_energy_ratio, std_clearance_factor, std_counts, std_crest_factor, std_energy, std_impulse_factor, std_k_factor, std_kurtosis, std_margin_factor, std_peak_amplitude, std_rms, std_shape_factor, std_skewness, std_spectral_centroid, std_spectral_kurtosis, std_spectral_peak_frequency, std_spectral_rolloff, std_spectral_skewness, std_spectral_variance, std_zero_crossing_rate]
Index: []

[0 rows x 42 columns]


## Above code

Above you should see a small portion of the filtered row wise data frame you have created. helps to make sure everything has worked correctly.

## Bellow code

Code bellow makes the pdf document with all the attribute plots. The printing of attribute just helps to keep track of prgress.

Please make sure to specify a output folder and file name for the report.

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

pdf_output = "pdf_reports"
pdf_file_name = "change here"

# The pdf file setup
pdf_file_of_charts = PdfPages(Path(pdf_output + "/" + pdf_file_name))

for attribute in properties_to_calculate:
    print(attribute)
    # The apply statement on the data frame to produce the chart
    filtered_combined_normal_mean_std.apply(lambda row: normal_dis_plot(row[("mean_"+attribute)],row[("std_"+attribute)],row.name), axis=1)
    plt.title(attribute + " normal plots")
    plt.ylabel("Probability")
    plt.xlabel(attribute)
    plt.legend()
    plt.savefig(pdf_file_of_charts,format="pdf")
    plt.clf()

# Close full pdf file.
pdf_file_of_charts.close()

band_energy


band_energy_ratio
clearance_factor
counts
crest_factor
energy
impulse_factor
k_factor
kurtosis
margin_factor
peak_amplitude
rms
shape_factor
skewness
spectral_centroid
spectral_kurtosis
spectral_peak_frequency
spectral_rolloff
spectral_skewness
spectral_variance
zero_crossing_rate


'\n    plt.clf() # clear plot object\n    plt.plot(Iterations,Signal_Float_Array) # plot line\n    plt.title("Signal over Time for " + Chart_Name) # give chart title\n    plt.savefig((Chart_Name + ".pdf"), format=\'pdf\') # Save to PDF\n'

<Figure size 640x480 with 0 Axes>