In [1]:
import pandas as pd

In [2]:
def clear_leading_data(df):
    # Detect where the timer is reset by finding the first negative difference
    reset_indices = df[df['Timestamp'].diff() < 0].index

    df_cleaned = df
    if not reset_indices.empty:
        if(reset_indices[0] < 255):
            # Drop rows before the timer reset
            reset_index = reset_indices[0]
            df_cleaned = df.iloc[reset_index:].reset_index(drop=True)
    
    return df_cleaned

In [3]:
def correct_timestamps(df):
    
    # Correct the timestamp by detecting wraparound and adjusting accordingly
    max_timestamp_value = 65535  # Maximum value before wraparound

    # Create a corrected timestamp column
    df['Corrected_Timestamp'] = df['Timestamp'].copy()

    # Iterate over the DataFrame and correct the timestamps
    correction = 0
    for i in range(1, len(df)):
        if df.loc[i, 'Timestamp'] < df.loc[i - 1, 'Timestamp']:
            # Detected wraparound
            correction += max_timestamp_value
        df.loc[i, 'Corrected_Timestamp'] += correction
    
    return df

In [4]:
# Load the data into a pandas DataFrame from the CSV file
csv_file_path1 = "../graphs/output12.csv"
csv_file_path2 = "../graphs/output22.csv"

df1 = pd.read_csv(csv_file_path1)

df2 = pd.read_csv(csv_file_path2)

# Print the number of rows in each dataframe
print("Number of rows in df1: ", len(df1))
print("Number of rows in df2: ", len(df2))

Number of rows in df1:  786420
Number of rows in df2:  786420


In [None]:
def find_first_difference(df1, df2):
    # Ensure that both DataFrames have the same shape
    if df1.shape != df2.shape:
        print("DataFrames have different shapes, cannot compare.")
        return

    # Compare the DataFrames element-wise
    differences = (df1 != df2)

    # Find the first row where there is at least one difference
    differing_rows = differences.any(axis=1)

    # Get the index of the first row where the two DataFrames differ
    first_diff_index = differing_rows.idxmax() if differing_rows.any() else None

    if first_diff_index is not None:
        print("First difference found at row index:", first_diff_index)
        print("df1 row:\n", df1.iloc[first_diff_index])
        print("df2 row:\n", df2.iloc[first_diff_index])
    else:
        print("No differences found between the two DataFrames.")

# Find and print the first row where the two DataFrames differ
find_first_difference(df1, df2)

In [5]:
def compare_differing_rows(df1, df2):
    # Ensure both DataFrames have the same shape
    if df1.shape != df2.shape:
        print("DataFrames have different shapes, cannot compare.")
        return
    
    # Compare the DataFrames element-wise
    differences = (df1 != df2)

    # Find rows where there is at least one difference
    differing_rows = differences.any(axis=1)

    # Get the indices of the differing rows
    differing_indices = differing_rows[differing_rows].index

    # Print out the differences
    if differing_indices.empty:
        print("No differences found.")
    else:
        print(f"Total number of differing rows: {len(differing_indices)}")  # Print the total count
        for index in differing_indices:
            print(f"Difference found at row index {index}:")
            # Print the entire row for both df1 and df2
            # print("df1 row:")
            # print(df1.iloc[index])
            # print("df2 row:")
            # print(df2.iloc[index])
            # Print the specific columns that are different
            differing_columns = differences.loc[index, differences.loc[index] == True].index
            for col in differing_columns:
                print(f"Column '{col}' differs: df1 = {df1.loc[index, col]}, df2 = {df2.loc[index, col]}")
            print("-" * 40)  # Divider between row comparisons
    
# Count and print the number of differing rows
compare_differing_rows(df1, df2)

Total number of differing rows: 30
Difference found at row index 47181:
Column 'Pressure0' differs: df1 = 92124.03425, df2 = 67035.553858
----------------------------------------
Difference found at row index 60525:
Column 'Y_Accel' differs: df1 = 149.548004, df2 = -0.98
----------------------------------------
Difference found at row index 62205:
Column 'Y_Accel' differs: df1 = 188.160004, df2 = -0.98
----------------------------------------
Difference found at row index 88221:
Column 'Z_Accel' differs: df1 = -1.568, df2 = -1.47
----------------------------------------
Difference found at row index 102525:
Column 'X_Accel' differs: df1 = -51.057999, df2 = -0.882
Column 'Y_Accel' differs: df1 = 150.479004, df2 = -0.049
----------------------------------------
Difference found at row index 213789:
Column 'Pressure0' differs: df1 = 93667.377908, df2 = 55531.995868
Column 'Temperature0' differs: df1 = -299.296189, df2 = -299.285975
Column 'Humidity0' differs: df1 = 46.163831, df2 = 46.162

In [6]:
def drop_differing_rows(df1, df2):
    # Ensure both DataFrames have the same shape
    if df1.shape != df2.shape:
        print("DataFrames have different shapes, cannot compare.")
        return df1, df2
    
    # Compare the DataFrames element-wise
    differences = (df1 != df2)

    # Find rows where there is at least one difference
    differing_rows = differences.any(axis=1)

    # Get the indices of the differing rows
    differing_indices = differing_rows[differing_rows].index

    # If there are differing rows, drop them
    if not differing_indices.empty:
        df1_cleaned = df1.drop(index=differing_indices)
        df2_cleaned = df2.drop(index=differing_indices)
        print(f"Dropped {len(differing_indices)} differing rows.")
    else:
        df1_cleaned = df1
        df2_cleaned = df2
        print("No differences found.")

    return df1_cleaned, df2_cleaned

df1, df2 = drop_differing_rows(df1, df2)

# Print the number of total rows in the dataframes
print(f"df1 rows: {len(df1)}")
print(f"df2 rows: {len(df2)}")

Dropped 30 differing rows.
df1 rows: 786390
df2 rows: 786390


In [76]:
def add_new_timestamp_column(df):
    # Create a new column "New Timestamp" with values starting from 0, incrementing by 20
    df['New Timestamp'] = range(0, 20 * len(df), 20)
    return df

# Correct Timestamps
df1 = clear_leading_data(df1)
df1 = correct_timestamps(df1)

df2 = clear_leading_data(df2)
df2 = correct_timestamps(df2)

df1 = add_new_timestamp_column(df1)
df2 = add_new_timestamp_column(df2)

In [None]:
def find_inconsistent_increment(df):
    # Calculate the differences between consecutive corrected timestamps
    df['Corrected_Difference'] = df['Corrected_Timestamp'].diff()

    # Find the rows where the difference is not between 10 and 30
    inconsistent_rows = df[(df['Corrected_Difference'] < 10) | (df['Corrected_Difference'] > 30)]
    
    print(len(inconsistent_rows))

    # if not inconsistent_rows.empty:
    #     # Get the indices and corresponding differences of inconsistent increments
    #     inconsistent_indices = inconsistent_rows.index
    #     num_inconsistencies = len(inconsistent_indices)
        
    #     print(f"Total number of inconsistent increments: {num_inconsistencies}")
    #     print("\nDetails of inconsistent increments:")
        
    #     for idx in inconsistent_indices:
    #         # Print the current row and its previous rows up to 2 rows back
    #         print(f"Inconsistency at row index: {idx}")
    #         start_index = max(0, idx - 2)
    #         # Print the details of the current and previous rows (up to 2 rows back)
    #         for i in range(start_index, idx + 1):
    #             timestamp = df.iloc[i]['Corrected_Timestamp']
    #             difference = df.iloc[i]['Corrected_Difference'] if i > 0 else 'N/A'
    #             print(f"Row index: {i}, Timestamp: {timestamp}, Difference to last timestamp: {difference}")
    #         print("-" * 40)
    # else:
    #     print("No inconsistent increments found.")
        

find_inconsistent_increment(df2)

In [None]:
def count_timestamps_in_range(df, lower_bound=0, upper_bound=10, previous_value_threshold=65500):
    # Shift the "Timestamp" column to compare with previous row
    df['Previous_Timestamp'] = df['Timestamp'].shift(1)
    
    # Filter rows where the "Timestamp" is between lower_bound and upper_bound
    # and the previous value is less than previous_value_threshold
    condition = (
        (df['Timestamp'] >= lower_bound) & 
        (df['Timestamp'] <= upper_bound) & 
        (df['Previous_Timestamp'] < previous_value_threshold)
    )
    
    # print only the first 10 rows and only the 'Timestamp' and 'Previous_Timestamp' columns
    # print(df[condition].tail(30)[['Timestamp', 'Previous_Timestamp']])
    
    count = df[condition].shape[0]

    print(f"Number of occurrences where 'Timestamp' is between {lower_bound} and {upper_bound} "
          f"and the previous value is less than {previous_value_threshold}: {count}")

    
count_timestamps_in_range(df1, previous_value_threshold=65500)

In [83]:
import matplotlib.pyplot as plt
import os

def plot_acceleration_vs_time(df, output_dir):

    # Plot the data
    plt.figure(figsize=(10, 6))
    plt.plot(df['New Timestamp'], df['X_Accel'], label='X Accel')
    plt.plot(df['New Timestamp'], df['Y_Accel'], label='Y Accel')
    plt.plot(df['New Timestamp'], df['Z_Accel'], label='Z Accel')
    plt.xlabel('Timestamp')
    plt.ylabel('Acceleration (Gs)')
    plt.title('Acceleration on X, Y, and Z vs Time')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, 'acceleration_vs_time1.png'))
    plt.close()

# Example usage
output_dir = "../graphs"
plot_acceleration_vs_time(df1, output_dir)

In [None]:
def plot_filtered_acceleration(df, output_dir, start_time, end_time):
    # Filter the DataFrame to include only rows within the specified timestamp range
    filtered_df = df[(df['New Timestamp'] >= start_time) & (df['New Timestamp'] <= end_time)]
    
    # Convert 'New Timestamp' from milliseconds to seconds
    filtered_df['New Timestamp (s)'] = filtered_df['New Timestamp'] / 1000
    
    # Plot the filtered data
    plt.figure(figsize=(10, 6))
    plt.plot(filtered_df['New Timestamp (s)'], filtered_df['X_Accel'], label='X Accel')
    plt.plot(filtered_df['New Timestamp (s)'], filtered_df['Y_Accel'], label='Y Accel')
    plt.plot(filtered_df['New Timestamp (s)'], filtered_df['Z_Accel'], label='Z Accel')
    plt.xlabel('New Timestamp (seconds)')
    plt.ylabel('Acceleration (Gs)')
    plt.title('Acceleration on X, Y, and Z vs Corrected Timestamp (in seconds)')
    plt.legend()
    plt.grid(True)
    
    # Save the plot
    plt.savefig(os.path.join(output_dir, 'filtered_acceleration_vs_time1.png'))
    plt.close()

# Example usage
output_dir = "../graphs"
start_time = 0.61e7
end_time = 0.67e7
plot_filtered_acceleration(df1, output_dir, start_time, end_time)


In [92]:
import matplotlib.pyplot as plt
import os

def plot_temperature_vs_time(df, output_dir):
    # Convert 'New Timestamp' from milliseconds to seconds
    df['New Timestamp (s)'] = df['New Timestamp'] / 1000

    # Plot the data
    plt.figure(figsize=(10, 6))
    plt.plot(df['New Timestamp'], df['Temperature0'], label='Temp0')
    plt.plot(df['New Timestamp'], df['Temperature1'], label='Temp1')
    plt.xlabel('Timestamp')
    plt.ylabel('Temperature (C)')
    plt.title('Temperature0 and Temperature1 vs Time')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, 'temperature_vs_time1.png'))
    plt.close()

# Example usage
output_dir = "../graphs"
plot_temperature_vs_time(df2, output_dir)

In [43]:
import matplotlib.pyplot as plt
import os

def plot_acceleration_vs_time(df, output_dir, val, pre_event_seconds=150, post_event_minutes=45):
    # Calculate the event condition
    df['X_Accel_Abs'] = df['X_Accel'].abs()
    df['Y_Accel_Abs'] = df['Y_Accel'].abs()
    df['Z_Accel_Abs'] = df['Z_Accel'].abs()
    
    # Find the index of the first row where any of the absolute accelerometer values exceeds val
    condition = (df['X_Accel_Abs'] > val) | (df['Y_Accel_Abs'] > val) | (df['Z_Accel_Abs'] > val)
    event_index = df[condition].index
    
    if event_index.empty:
        print("No events found with accelerometer values exceeding the specified threshold.")
        return
    
    event_index = event_index[0]
    event_time = df.loc[event_index, 'Corrected_Timestamp']
    
    # Define the time range for plotting
    start_time = event_time - (pre_event_seconds * 1000)  # Convert seconds to milliseconds
    end_time = event_time + (post_event_minutes * 60 * 1000)  # Convert minutes to milliseconds
    
    # Filter the DataFrame to include only rows within the specified time range
    filtered_df = df[(df['Corrected_Timestamp'] >= start_time) & (df['Corrected_Timestamp'] <= end_time)]
    
    # Plot the filtered data
    plt.figure(figsize=(10, 6))
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['X_Accel'], label='X Accel')
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['Y_Accel'], label='Y Accel')
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['Z_Accel'], label='Z Accel')
    plt.xlabel('Timestamp')
    plt.ylabel('Acceleration (Gs)')
    plt.title('Acceleration on X, Y, and Z vs Time')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, 'acceleration_vs_time_filtered.png'))
    plt.close()

# Example usage
output_dir = "../graphs"
val = 5
plot_acceleration_vs_time(df1, output_dir, val)

In [20]:
def plot_filtered_acceleration(df, output_dir, start_time, end_time):
    # Filter the DataFrame to include only rows within the specified timestamp range
    filtered_df = df[(df['Corrected_Timestamp'] >= start_time) & (df['Corrected_Timestamp'] <= end_time)]
    
    # Plot the filtered data
    plt.figure(figsize=(10, 6))
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['X_Accel'], label='X Accel')
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['Y_Accel'], label='Y Accel')
    plt.plot(filtered_df['Corrected_Timestamp'], filtered_df['Z_Accel'], label='Z Accel')
    plt.xlabel('Corrected Timestamp')
    plt.ylabel('Acceleration (Gs)')
    plt.title('Acceleration on X, Y, and Z vs Corrected Timestamp')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(output_dir, 'filtered_acceleration_vs_time.png'))
    plt.close()

# Example usage
output_dir = "../graphs"
start_time = 9.245e8
end_time = 9.247e8
plot_filtered_acceleration(df2, output_dir, start_time, end_time)