In [1]:
import pandas as pd

## Funkcija za filtriranje podataka ovisno o trajanju intervala bez promjene zone_fan_speed

In [151]:
def filtriranje(df, trajanje):
    df['zone_fan_speed'] = df['zone_fan_speed'].replace(33.0, 33.3)
    df['zone_fan_speed'] = df['zone_fan_speed'].replace({
    0.0: 0,
    33.3: 1,
    66.5: 2,
    100.0: 3
    })
    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')    
    piv = df.pivot_table(index='timestamp', 
                     columns='zone_id', 
                     values='zone_fan_speed', 
                     aggfunc='first')
    piv = piv.sort_index()
    piv = piv.bfill()
    changes_any_zone = piv.diff().ne(0).any(axis=1)     
    piv['segment_id'] = changes_any_zone.cumsum()
    df = df.set_index('timestamp')
    df = df.join(piv['segment_id'], how='inner')
    df = df.reset_index()
    segments = piv.groupby('segment_id', as_index=False).agg(
        start_time = ('segment_id', lambda x: x.index[0]),
        end_time   = ('segment_id', lambda x: x.index[-1])
    )
    segments['duration'] = segments['end_time'] - segments['start_time']
    fan_speed_po_segmentu = (
        df.groupby('segment_id')['zone_fan_speed']
        .agg(lambda x: x.dropna().iloc[0] if not x.dropna().empty else None)
        .reset_index()
        .rename(columns={'zone_fan_speed': 'fan_speed_state'})
    )
    segments = segments.merge(fan_speed_po_segmentu, on='segment_id', how='left')
    segments_novi = segments[segments['duration'] >= pd.Timedelta(hours=trajanje)]
    return (segments_novi)

In [132]:
vrijeme = [0.25, 0.5, 1]
gvp = dict()     #godina, vrijeme, podaci

### Iteracija po godinama:

In [158]:
from collections import defaultdict
gvp = defaultdict(list)
for godina in range(2018, 2023):
    path = f"../Podaci/9_S/{godina}/zones_20_year_{godina}.csv"
    podaci = pd.read_csv(path, low_memory=False)
    for v in vrijeme:
        segments_df = filtriranje(podaci, v)
        broj = len(segments_df)
        counts = segments_df['fan_speed_state'].value_counts().to_dict()
        brzine = {i: counts.get(i, 0) for i in range(4)}
        gvp[godina].append([
            int(v*60),  
            broj,       
            brzine[0], 
            brzine[1],
            brzine[2],
            brzine[3]
        ])

### Konačni rezultat:

In [146]:
for key, value in gvp.items():
    print(key, ":")
    for svaki in value:
        print(f"\tMinuta: {svaki[0]}")
        print(f"\tPodataka: {svaki[1]}")
        print(f"Brzine (0,1,2,3): ({svaki[2]},{svaki[3]},{svaki[4]},{svaki[5]})")
    print("\n")

2018 :
	Minuta: 15
	Podataka: 1248
Brzine (0,1,2,3): (1060,97,35,56)
	Minuta: 30
	Podataka: 753
Brzine (0,1,2,3): (682,30,10,31)
	Minuta: 60
	Podataka: 491
Brzine (0,1,2,3): (465,10,4,12)


2019 :
	Minuta: 15
	Podataka: 2169
Brzine (0,1,2,3): (1810,105,49,205)
	Minuta: 30
	Podataka: 1279
Brzine (0,1,2,3): (1080,40,23,136)
	Minuta: 60
	Podataka: 760
Brzine (0,1,2,3): (657,18,14,71)


2020 :
	Minuta: 15
	Podataka: 1611
Brzine (0,1,2,3): (1434,90,3,84)
	Minuta: 30
	Podataka: 1035
Brzine (0,1,2,3): (932,57,0,46)
	Minuta: 60
	Podataka: 683
Brzine (0,1,2,3): (627,27,0,29)


2021 :
	Minuta: 15
	Podataka: 1882
Brzine (0,1,2,3): (1576,166,6,134)
	Minuta: 30
	Podataka: 1115
Brzine (0,1,2,3): (958,93,1,63)
	Minuta: 60
	Podataka: 630
Brzine (0,1,2,3): (561,39,0,30)


2022 :
	Minuta: 15
	Podataka: 2276
Brzine (0,1,2,3): (2097,72,23,84)
	Minuta: 30
	Podataka: 1319
Brzine (0,1,2,3): (1248,23,13,35)
	Minuta: 60
	Podataka: 815
Brzine (0,1,2,3): (781,12,8,14)


