In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [None]:
# Set the path to the git folder and the asset folder
git_folder = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', 'HeroysundBridge-ML'))
asset_folder = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', 'HeroysundBridge-ML-Assets'))

print("Path to git folder:", git_folder)
print("Path to asset folder:", asset_folder)

# Read Parquet file
file_path = os.path.join(asset_folder, 'bronze', 'heroysund_sensors', '2021', '02', '20210201083943.parquet')
data = pd.read_parquet(file_path)
display(data)

In [None]:
columns_to_plot = ['Point_1_N', 'Point_2_N', 'Point_3_N', 'Point_4_S', 'Point_5_S', 'Point_6_S', 'Omega_N', 
                    'Omega_S', 'PT100_Temperature', 'Point_N+1500', 'Point_N-1500', 'Point_S+2000',
                    'Point_S+1000', 'Point_S+0', 'Point_S-1000', 'Point_S-2000']

for i, col in enumerate(columns_to_plot):
    axs[i].plot(data[col], label=col)
    axs[i].set_title(col)
    axs[i].legend()

fig, axs = plt.subplots(len(columns_to_plot), 1, figsize=(10, 50))
plt.tight_layout()
plt.show()

**Hourly variation compared to hour-mean value**

In [None]:
# Note: This Code Cell was modified with GitHub Copilot
average_values = data[['Point_1_N', 'Point_2_N', 'Point_3_N', 'Point_4_S', 'Point_5_S', 'Point_6_S', 'Omega_N', 'Omega_S', 'PT100_Temperature']].mean()

# Generate column names for aggregated DataFrame
column_names = [f"{col}_mean" for col in average_values.index]
aggregated_df = pd.DataFrame([average_values], columns=column_names)

aggregated_df

In [None]:
# Note: This Code Cell was modified with GitHub Copilot
fig, axs = plt.subplots(len(aggregated_df.columns), 1, figsize=(20, 30), sharex=True)

for i, column in enumerate(aggregated_df.columns):
    # Extract the original column name without '_mean' to match with 'data' DataFrame
    original_column = column.replace('_mean', '')
    
    # Plot original data
    axs[i].plot(data.index, data[original_column], label=f'Original {original_column}', color='blue', alpha=0.5)
    
    # Plot average value as a horizontal line
    mean_value = aggregated_df[column].iloc[0]
    axs[i].axhline(y=mean_value, color='red', linestyle='--', label=f'Average {column}')
    
    axs[i].set_ylabel('Value')
    axs[i].legend()
    axs[i].set_title(f'{original_column} Sensor Data vs. Average')

# Set common labels
plt.xlabel('Index')
plt.tight_layout()
plt.show()

**With outlier handling (Excluding vechile affected abnormalty)**

In [None]:
# Note: This Code Cell was modified with GitHub Copilot
data.index = np.arange(0, 72037)

# Prepare the plot
fig, axs = plt.subplots(len(aggregated_df.columns), 1, figsize=(20, 30), sharex=True)

for i, column in enumerate(aggregated_df.columns):
    original_column = column.replace('_mean', '')
    mean_value = aggregated_df[column].iloc[0]

    # Filter values more than 10 units above the mean
    filtered_data = data.loc[(data[original_column] <= (mean_value + 10)) & (data[original_column] >= (mean_value - 10)), original_column]

    # Plot filtered data
    axs[i].plot(filtered_data.index, filtered_data, label=f'Original {original_column}', color='blue', alpha=0.5)
    
    # Plot average value as a horizontal line
    axs[i].axhline(y=mean_value, color='red', linestyle='--', label=f'Average {column}')
    
    axs[i].set_ylabel('Value')
    axs[i].legend()
    axs[i].set_title(f'{original_column} Sensor Data vs. Average')

# Adjusting the x-axis to show minutes from 0 to 60
plt.xticks(np.arange(0, 61, 5))  # Set x-axis ticks to every 5 minutes for clarity
plt.xlabel('Time (minutes)')
plt.tight_layout()
plt.show()