Python Imports

In [None]:
%pip install --upgrade pip
%pip install pandas
%pip install numpy
%pip install plotly
%pip install nbformat

In [2]:
import pandas as pd
import numpy as np
from itertools import groupby
from operator import itemgetter
import plotly.graph_objs as go
import plotly.express as px

In [None]:
# pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 0)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.expand_frame_repr', False)

# Defining variables and functions
sensors = ["dig1", "ana1", "noise"]
def visualize_dataframe(df, sample_size=None):
    # Downsample the DataFrame if sample_size is provided
    if sample_size:
        downsampled_df = df.sample(n=sample_size).sort_values(by=['Time'])
    else:
        downsampled_df = df
        
    # Melt the DataFrame
    melted_df = downsampled_df.melt(id_vars=['Time'], value_vars=['ana1', 'dig1', 'noise'], var_name='Sensor', value_name='Value')
    
    # Create a Plotly figure
    fig = px.line(melted_df, x='Time', y='Value', color='Sensor', title='Sensor Data Over Time')
    
    # Update layout
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title='Value',
        hovermode='x unified'
    )
    
    # Show the figure
    fig.show()
        
# Load the dataset
df = pd.read_csv('2datasetCleaned.csv', comment='#', low_memory=False)
df.set_index('Index', inplace=True)

# Display the first few rows of the DataFrame
print(df.head())

Get a List with Pairs of Datapoints with from/to values of Missing IDs

In [None]:
all_indexes = pd.Series(range(df.index.min(), df.index.max() + 1))
missing_indexes = all_indexes[~all_indexes.isin(df.index)]

# Find ranges of missing indexes
missing_ranges = []
for k, g in groupby(enumerate(missing_indexes), lambda x: x[0] - x[1]):
    group = list(map(itemgetter(1), g))
    missing_ranges.append((group[0], group[-1]))

# Create a DataFrame with the required columns
missing_data = []
for start, end in missing_ranges:
    first_missing_date = df.loc[start - 1, 'Time'] if start - 1 in df.index else None
    last_missing_date = df.loc[end + 1, 'Time'] if end + 1 in df.index else None
    missing_data.append([start, end, first_missing_date, last_missing_date])

missing_df = pd.DataFrame(missing_data, columns=['First Missing Index', 'Last Missing Index', 'First Missing Time', 'Last Missing Time'])

print(missing_df.head())

# Visualize the DataFrame
first_time = df['Time'].iloc[0]
last_time = df['Time'].iloc[-1]


Clean the Data

In [5]:
df.loc[df['dig1'] < 10, 'dig1'] = np.nan
df.loc[df['dig1'] > 50, 'dig1'] = np.nan
df.loc[df['ana1'] < 10, 'ana1'] = np.nan
df.loc[df['ana1'] > 50, 'ana1'] = np.nan
df.loc[df['noise'] < 0, 'noise'] = np.nan
df.loc[df['noise'] > 100, 'noise'] = np.nan

Get Datapoints around the highest and lowest Values for each Sensor

In [None]:
results = {}

for sensor in sensors:
    # Find the index of the maximum value for the sensor
    max_index = df[sensor].idxmax()    
    min_index = df[sensor].idxmin()

    # Get x datapoints above and below the max_index
    x = 360
    start_index_max = max_index - x
    end_index_max = max_index + x
    start_index_min = min_index - x
    end_index_min = min_index + x
    

    # Ensure indices are within the DataFrame's range
    if start_index_max < df.index.min():
        start_index_max = df.index.min()
    if end_index_max > df.index.max():
        end_index_max = df.index.max()
    if start_index_min < df.index.min():
        start_index_min = df.index.min()
    if end_index_min > df.index.max():
        end_index_min = df.index.max()

    # Extract the datapoints
    results_max = {}
    results_min = {}
    results_max[sensor] = df.loc[start_index_max:end_index_max]
    results_min[sensor] = df.loc[start_index_min:end_index_min]
    

    print(f"Max val Sensor: {sensor} " + str(df.loc[max_index, sensor]))
    print(results_max[sensor])
    print("")
    visualize_dataframe(results_max[sensor])
    
    print(f"Min val Sensor: {sensor} " + str(df.loc[min_index, sensor]))
    print(results_min[sensor])
    print("")
    visualize_dataframe(results_min[sensor])

Create a Plotly Visualization

In [None]:
print(df.head())
visualize_dataframe(df, 100000)