In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime, timedelta


In [None]:
data = pd.read_parquet(r"fridge_data_selected_places.parquet")
print(data.head())

In [None]:
## Use this when you have continous data (recent data)
"""
# Ensure 'time_index' is in UTC and timezone-aware
today = datetime.now(pytz.UTC)  # Current date and time in UTC
last_week = today - timedelta(days=7)  # 7 days ago in UTC

# Filter to only include the last 7 days
data_last_7_days = data[data.index.get_level_values('time_index') >= last_week]

# Verify the filtered data
print(f"Filtered data contains {len(data_last_7_days)} rows.")
print(data_last_7_days.head())"
"
"""

In [None]:
# Get the latest timestamp in the data
max_time = data.index.get_level_values('time_index').max()  # Maximum timestamp in the data

# Calculate the start of the last 7 days relative to the data
last_7_days_start = max_time - timedelta(days=7)

# Filter the data for the last 7 days
data = data[data.index.get_level_values('time_index') >= last_7_days_start]

# Verify the filtered data
print(f"Filtered data contains {len(data)} rows.")
print(data.head())
print(data.shape)

In [None]:
fig = go.Figure()

for (place, appliance), df in data.groupby(level=['place', 'appliance']):
    if 'køleskab' in appliance.lower():
        fig.add_trace(go.Histogram(histfunc='count', x=df['temperature'], name=f'{place} {appliance}', histnorm='percent'))

fig.update_layout(title='Køleskaber', title_x=0.5, xaxis_title='Temperatur', yaxis_title='Procent')
fig.show()
fig = go.Figure()


In [None]:
for (place, appliance), df in data.groupby(level=['place', 'appliance']):
    if 'fryser' in appliance.lower():
        fig.add_trace(go.Histogram(histfunc='count', x=df['temperature'], name=f'{place} {appliance}', histnorm='percent'))

fig.update_layout(title='Fryser', title_x=0.5, xaxis_title='Temperatur', yaxis_title='Procent')
fig.show()

In [None]:
# Helper function to classify appliances
def classify_appliance(df):
    # Calculate IQR
    Q1 = np.percentile(df['temperature'], 25)
    Q3 = np.percentile(df['temperature'], 75)
    IQR = Q3 - Q1

    # Define acceptable range (remove tailed spread)
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df_filtered = df[(df['temperature'] >= lower_bound) & (df['temperature'] <= upper_bound)]

    # Calculate spread (IQR) of filtered data
    filtered_IQR = np.percentile(df_filtered['temperature'], 75) - np.percentile(df_filtered['temperature'], 25)

    # Classify appliance as 'good' or 'bad' based on filtered IQR
    classification = "good" if filtered_IQR < 2.0 else "bad"  # Threshold spread (e.g., 2.0)

    return classification, df_filtered


In [None]:
# Analyze and visualize data
fig = go.Figure()

for (place, appliance), df in data.groupby(level=['place', 'appliance']):
    if 'køleskab' in appliance.lower():
        # Classify appliance and filter data
        classification, df_filtered = classify_appliance(df)

        # Add histogram for filtered data
        fig.add_trace(go.Histogram(
            histfunc='count', 
            x=df_filtered['temperature'], 
            name=f'{place} {appliance} ({classification})', 
            histnorm='percent'
        ))

# Update layout
fig.update_layout(
    title='Køleskaber (Filtered and Classified)',
    title_x=0.5,
    xaxis_title='Temperatur',
    yaxis_title='Procent'
)
fig.show()

In [None]:
# Analyze and visualize data
fig = go.Figure()

for (place, appliance), df in data.groupby(level=['place', 'appliance']):
    if 'fryser' in appliance.lower():
        # Classify appliance and filter data
        classification, df_filtered = classify_appliance(df)

        # Add histogram for filtered data
        fig.add_trace(go.Histogram(
            histfunc='count', 
            x=df_filtered['temperature'], 
            name=f'{place} {appliance} ({classification})', 
            histnorm='percent'
        ))

# Update layout
fig.update_layout(
    title='Fryser (Filtered and Classified)',
    title_x=0.5,
    xaxis_title='Temperatur',
    yaxis_title='Procent'
)
fig.show()

In [None]:
def analyze_place(data, place_name):
    """
    Analyze all appliances in the specified place and classify them as 'good' or 'bad'.

    Parameters:
    - data: MultiIndex DataFrame with levels ['place', 'appliance', 'time_index'].
    - place_name: Name of the place to analyze.

    Returns:
    - appliances_summary: A dictionary with appliance names as keys and their classification ('good' or 'bad').
    """
    # Dictionary to store the results
    appliances_summary = {}
    
    # Filter the data for the specified place
    place_data = data.loc[place_name]
    
    # Loop through each appliance at the place
    for appliance, df in place_data.groupby(level='appliance'):
        # Classify the appliance using the earlier logic
        classification, _ = classify_appliance(df)
        # Store the result in the dictionary
        appliances_summary[appliance] = classification
    
    return appliances_summary



In [None]:

place_name = "Boernesymfonien"  # Replace with the name of the place you want to analyze
result = analyze_place(data, place_name)

# Print the summary for the given place
print(f"Appliances at {place_name}:")
for appliance, classification in result.items():
    print(f"- {appliance}: {classification}")
