Python Imports

In [None]:
%pip install --upgrade pip
%pip install pandas
%pip install numpy
%pip install plotly
%pip install nbformat
%pip install statsmodels

In [2]:
import pandas as pd
import numpy as np
from itertools import groupby
from operator import itemgetter
import plotly.graph_objs as go
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller


In [None]:
# pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 0)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.expand_frame_repr', False)

# Defining variables and functions
sensors = ["dig1", "ana1", "noise"]
def visualize_dataframe(df, sample_size=None):
    # Downsample the DataFrame if sample_size is provided
    if sample_size:
        downsampled_df = df.sample(n=sample_size).sort_values(by=['Time'])
    else:
        downsampled_df = df
        
    # Melt the DataFrame
    melted_df = downsampled_df.melt(id_vars=['Time'], value_vars=['ana1', 'dig1', 'noise'], var_name='Sensor', value_name='Value')
    
    # Create a Plotly figure
    fig = px.line(melted_df, x='Time', y='Value', color='Sensor', title='Sensor Data Over Time')
    
    # Update layout
    fig.update_layout(
        xaxis_title='Time',
        yaxis_title='Value',
        hovermode='x unified'
    )
    
    # Show the figure
    fig.show()
        
# Load the dataset
df = pd.read_csv('2datasetCleaned.csv', comment='#', low_memory=False)
df.set_index('Index', inplace=True)

# Display the first few rows of the DataFrame
print(df.head())

Checking if the Data is stationary

In [None]:
df_5min = df.copy()
df_5min['Time'] = pd.to_datetime(df_5min['Time'])
df_5min.set_index('Time', inplace=True)
df_5min = df_5min.resample('5min').mean()
sensor_columns = ['dig1', 'ana1', 'noise']

def check_stationarity(data):
    result = adfuller(data)
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    for key, value in result[4].items():
        print('Critical Values:')
        print(f'   {key}, {value}')

# Check stationarity for each sensor column
for sensor in sensor_columns:
    print(f"Checking stationarity for {sensor}:")
    check_stationarity(df_5min[sensor].dropna())
    print("\n")

Visualizing trends with heatmaps

In [None]:
df['Time'] = pd.to_datetime(df['Time'])

df['Hour'] = df['Time'].dt.hour
df['Date'] = df['Time'].dt.date

df_hourly = df.groupby(['Date', 'Hour'])[sensor_columns].mean().reset_index()

# Loop through each sensor column to create heatmaps
for sensor in sensor_columns:
    # Pivot the data to get hours on the y-axis and days on the x-axis
    heatmap_data = df_hourly.pivot_table(index='Hour', columns='Date', values=sensor, aggfunc='mean')
    
    # Create a heatmap for the sensor
    heatmap = go.Figure(data=go.Heatmap(
        z=heatmap_data.values,
        x=heatmap_data.columns,
        y=heatmap_data.index,
        colorscale='Viridis',
        colorbar=dict(title=f'{sensor} readings')
    ))
    
    # Update layout
    heatmap.update_layout(
        title=f'{sensor} Readings Heatmap',
        xaxis_title='Date',
        yaxis_title='Hour of the Day',
        height=800,
        width=1200
    )
    
    # Show the plot
    heatmap.show()