<a href="https://colab.research.google.com/github/Meguazy/project_CSD/blob/main/notebook_exploration_cleaning/entropy_axis_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd "/content/drive/MyDrive/Schnell_Traino_Project_CSD/NewData"

In [None]:
%ls

# Setup csvs and libraries

In [86]:
import pandas as pd
from scipy.stats import entropy
from itertools import combinations
import numpy as np

Change delimiter in order to open the csvs more easily

In [87]:
no_guasto_df = pd.read_csv("NO_GUASTO.csv")

In [88]:
guasto_df = pd.read_csv("RUOTA_GUASTA.csv")

Aggregation of entries by discrete time in order not to have repeated rows

In [None]:
guasto_df_aggregated = guasto_df.groupby('Discrete_Time').mean().reset_index()

In [None]:
no_guasto_df_aggregated = no_guasto_df.groupby('Discrete_Time').mean().reset_index()

In [91]:
# Extract accelerometer columns
accelerometer_columns = [
    'Board1Acc1X', 'Board1Acc1Y', 'Board1Acc1Z',
    'Board1Acc2X', 'Board1Acc2Y', 'Board1Acc2Z',
    'Board1Acc3X', 'Board1Acc3Y', 'Board1Acc3Z',
    'Board2Acc1X', 'Board2Acc1Y', 'Board2Acc1Z',
    'Board2Acc2X', 'Board2Acc2Y', 'Board2Acc2Z',
    'Board2Acc3X', 'Board2Acc3Y', 'Board2Acc3Z',
    'Board3Acc1X', 'Board3Acc1Y', 'Board3Acc1Z',
    'Board3Acc2X', 'Board3Acc2Y', 'Board3Acc2Z',
    'Board3Acc3X', 'Board3Acc3Y', 'Board3Acc3Z'
]

In [None]:
# Statistics for the columns of the anomalies dataset
for acc_col in accelerometer_columns:
    all_values_count = guasto_df_aggregated[acc_col].count()
    unique_values_count = guasto_df_aggregated[acc_col].nunique()
    unique_values_counts = guasto_df_aggregated[acc_col].value_counts()

    print(f"Statistics for {acc_col}:")
    print(f"Count of all values: {all_values_count}")
    print(f"Count of unique values: {unique_values_count}")
    print("Count of occurrences for each unique value:")
    print(unique_values_counts)
    print("\n")


# Valid Entropy records for each axis

Entropy for the dataset with anomalies

In [96]:
# Dictionary to store the count of valid entropy for each accelerometer
valid_entropy_counts = {}

# Loop through each unique Acquisition_Number
for acquisition_number in guasto_df_aggregated['Acquisition_Number'].unique():
    # Filter the dataframe by Acquisition_Number
    filtered_df = guasto_df_aggregated.loc[guasto_df_aggregated['Acquisition_Number'] == acquisition_number]

    # Calculate entropy for each accelerometer separately
    for acc_col in accelerometer_columns:
        entropy_values = []
        valid_count = 0

        entropy_values = entropy(filtered_df[acc_col])

        # Check if entropy is a valid number (not -inf)
        if not np.isinf(entropy_values):
            valid_count = 1

        # Update the count of valid entropy for the current accelerometer
        if acc_col not in valid_entropy_counts:
            valid_entropy_counts[acc_col] = 0
        valid_entropy_counts[acc_col] += valid_count

# Print the count of valid entropy values for each accelerometer
print("Count of valid entropy values for each accelerometer:")
for acc_col, count in valid_entropy_counts.items():
    print(f"{acc_col}: {count}")

Count of valid entropy values for each accelerometer:
Board1Acc1X: 23
Board1Acc1Y: 2
Board1Acc1Z: 38
Board1Acc2X: 0
Board1Acc2Y: 14
Board1Acc2Z: 74
Board1Acc3X: 28
Board1Acc3Y: 0
Board1Acc3Z: 14
Board2Acc1X: 160
Board2Acc1Y: 0
Board2Acc1Z: 125
Board2Acc2X: 0
Board2Acc2Y: 116
Board2Acc2Z: 90
Board2Acc3X: 20
Board2Acc3Y: 0
Board2Acc3Z: 0
Board3Acc1X: 0
Board3Acc1Y: 33
Board3Acc1Z: 0
Board3Acc2X: 164
Board3Acc2Y: 10
Board3Acc2Z: 0
Board3Acc3X: 145
Board3Acc3Y: 0
Board3Acc3Z: 0


Entropy for the dataset with no anomalies

In [97]:
# Dictionary to store the count of valid entropy for each accelerometer
valid_entropy_counts = {}

# Loop through each unique Acquisition_Number
for acquisition_number in no_guasto_df_aggregated['Acquisition_Number'].unique():
    # Filter the dataframe by Acquisition_Number
    filtered_df = no_guasto_df_aggregated.loc[no_guasto_df_aggregated['Acquisition_Number'] == acquisition_number]

    # Calculate entropy for each accelerometer separately
    for acc_col in accelerometer_columns:
        entropy_values = []
        valid_count = 0

        entropy_values = entropy(filtered_df[acc_col])

        # Check if entropy is a valid number (not -inf)
        if not np.isinf(entropy_values):
            valid_count = 1

        # Update the count of valid entropy for the current accelerometer
        if acc_col not in valid_entropy_counts:
            valid_entropy_counts[acc_col] = 0
        valid_entropy_counts[acc_col] += valid_count

# Print the count of valid entropy values for each accelerometer
print("Count of valid entropy values for each accelerometer:")
for acc_col, count in valid_entropy_counts.items():
    print(f"{acc_col}: {count}")

Count of valid entropy values for each accelerometer:
Board1Acc1X: 44
Board1Acc1Y: 137
Board1Acc1Z: 144
Board1Acc2X: 0
Board1Acc2Y: 128
Board1Acc2Z: 141
Board1Acc3X: 46
Board1Acc3Y: 113
Board1Acc3Z: 79
Board2Acc1X: 162
Board2Acc1Y: 59
Board2Acc1Z: 146
Board2Acc2X: 21
Board2Acc2Y: 186
Board2Acc2Z: 177
Board2Acc3X: 50
Board2Acc3Y: 44
Board2Acc3Z: 80
Board3Acc1X: 0
Board3Acc1Y: 222
Board3Acc1Z: 117
Board3Acc2X: 170
Board3Acc2Y: 109
Board3Acc2Z: 39
Board3Acc3X: 151
Board3Acc3Y: 131
Board3Acc3Z: 85
