In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
import numpy as np

In [None]:
data = pd.read_parquet(r"fridge_data_cleaned.parquet")
print(data.head(10))

In [None]:
# Clean the 'name' column

data['name'] = data['name'].str.strip()
data['name'] = data['name'].str.replace(r'\s+', ' ', regex=True)  # Normalize spaces


# Step 2: Extract "fryser" or "køleskab" with their numbers
# data['extracted_appliance'] = data['name'].str.extract(r'(fryser\s*\d+|køleskab\s*\d+)$')

data['extracted_appliance'] = data['name'].str.extract(r'(?i)(fryser\s*\d+|køleskab\s*\d+)\s*$', expand=False)


# Step 3: Update the 'appliances' column where it is NaN
data['appliance'] = data['appliance'].fillna(data['extracted_appliance'])

data['appliance'] = (data['appliance'].str.lower().str.replace(r'(fryser|køleskab)\s*(\d+)', r'\1 \2', regex=True).str.strip())


# Remove the extracted appliance from the 'name' column
#data['name'] = data['name'].str.replace(r'(fryser\s*\d+|køleskab\s*\d+)$', '', regex=True).str.strip()

data['name'] = data['name'].str.replace(r'(?i)(fryser\s*\d+|køleskab\s*\d+)\s*$', '', regex=True).str.strip()


# Step 5: Drop the temporary column (optional)
data = data.drop(columns=['extracted_appliance'])

print(data.head(10))


In [None]:
places = [
    'Bofællesskabet Mårslet', 'Bofællesskabet Vintervej', 'Børnehaven Hygge og Leg', 
    'Børnehuset Bakken', 'Boernesymfonien', 'CSB Brabrand', 'CSB Egebækken', 
    'Graven', 'Harlev', 'Jespers Torvekøkken', 'Kantinen Grøndalsvej 2', 
    'Plejehjemmet Skovvang', 'Plejehjemmet Trøjborg', 'Vikærgården'
]
data['place'] = data['name'].apply(lambda x: next((place for place in places if place in x), 'Unknown'))

# Step 2: Filter data for one place (e.g., "Graven")
selected_place = "Plejehjemmet Skovvang"
filtered_df = data[data['place'] == selected_place]

print(filtered_df.head(20))

In [None]:

for appliance_name, appliance_df in filtered_df.groupby('appliance'):
    # Sort by time and reset index
    appliance_df = appliance_df.sort_values('time_index').reset_index(drop=True)

    # Extract temperature and time
    temperatures = appliance_df['temperature'].values
    time_index = appliance_df['time_index'].values

    print(f"\nAnalyzing Appliance: {appliance_name}")

    # Step 3.1: Identify peaks and valleys
    peaks, _ = find_peaks(temperatures, distance=6)  # Adjust `distance` based on data
    valleys, _ = find_peaks(-temperatures, distance=6)

    # Combine peaks and valleys
    cycle_points = sorted(list(peaks) + list(valleys))

    if len(cycle_points) < 2:
        print("No steady patterns detected. Data may be irregular.")
        continue

    # Step 3.2: Calculate amplitude (difference between peaks and valleys)
    amplitudes = []
    for i in range(len(cycle_points) - 1):
        if cycle_points[i] in valleys and cycle_points[i + 1] in peaks:
            amplitudes.append(temperatures[cycle_points[i + 1]] - temperatures[cycle_points[i]])

    if len(amplitudes) == 0:
        print("No valid amplitudes detected.")
        continue

    # Step 3.3: Identify the most common amplitude
    most_common_amplitude = np.median(amplitudes)  # Use median to avoid outliers
    print(f"Most Common Amplitude: {most_common_amplitude:.2f}°C")

    # Step 3.4: Filter steady oscillations based on the most common amplitude
    steady_indices = []
    for i in range(len(cycle_points) - 1):
        if (
            cycle_points[i] in valleys
            and cycle_points[i + 1] in peaks
            and abs(temperatures[cycle_points[i + 1]] - temperatures[cycle_points[i]] - most_common_amplitude) < 2
        ):  # Allow a small deviation
            steady_indices.extend([cycle_points[i], cycle_points[i + 1]])

    steady_temperatures = temperatures[steady_indices]
    steady_time_index = time_index[steady_indices]

    # Step 3.5: Calculate steady temperature range
    regular_temperatures = steady_temperatures
    min_temp = regular_temperatures.min()
    max_temp = regular_temperatures.max()

    Regular_temperature_range = max_temp - min_temp

    print(f"Regular Temperature Range: {min_temp:.2f}°C to {max_temp:.2f}°C")
    print(f"Regular Temperature Range Difference: {Regular_temperature_range:.2f}°C")

    
    
    # Determine if the appliance is good or bad
    appliance_type = "freezer" if "fryser" in appliance_name.lower() else "fridge"
    is_good = False

    if appliance_type == "freezer":
        # Freezer criteria: -20°C to -15°C, amplitude ~3°C to 5°C
        if -20 <= min_temp <= -15 and -20 <= max_temp <= -15 and 3 <= Regular_temperature_range <= 5:
            is_good = True
    elif appliance_type == "fridge":
        # Fridge criteria: 1°C to 6°C, amplitude ~2°C to 3°C
        if 1 <= min_temp <= 6 and 1 <= max_temp <= 6 and 2 <= Regular_temperature_range <= 3:
            is_good = True

    status = "Good" if is_good else "Bad"
    print(f"Appliance Status: {status}")


    # Plot steady patterns
    plt.figure(figsize=(10, 5))
    plt.plot(time_index, temperatures, label="All Temperatures", color="blue", alpha=0.3)
    plt.plot(steady_time_index, steady_temperatures, label="Steady Temperatures", color="green")
    plt.scatter(steady_time_index, steady_temperatures, color="red", label="Steady Peaks and Valleys")
    plt.title(f"Steady Patterns for {appliance_name} in {selected_place}")
    plt.xlabel("Time")
    plt.ylabel("Temperature (°C)")
    plt.legend()
    plt.grid(True)
    plt.show()