<h1>Набор данных о потреблении объекта ледовой арены</h1>

In [3]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import os
import functools as ft
import datetime as dt
from datetime import datetime
from dotenv import load_dotenv
from clickhouse_driver import Client
from data_wrapper import db_wrapper
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm

In [4]:
# pd.read_csv('../data/data.csv')['time']

In [5]:
# Период мониторинга для получения данных, на которых будет проводиться обучение модели
# start_date = input('Введите начало периода в формате YYYY-MM-DD:')
# end_date = input('Введите конец периода в формате YYYY-MM-DD:')
# startYear, startMonth, startDay = map(int, start_date.split('-'))
# endYear, endMonth, endDay = map(int, end_date.split('-'))

<h2>Подключение к базе данных</h2>

In [6]:
load_dotenv()

HOST = os.getenv('HOST')
USERNAME = os.getenv('USERNAME')
PASSWORD = os.getenv('PASSWORD')
CA = os.getenv('CA')
READONLY = os.getenv('READONLY')

wrap = db_wrapper.ClickHouseWrapper(host=HOST, user=USERNAME, password=PASSWORD, ca=CA)
client = Client(host=HOST, user=USERNAME, password=PASSWORD, ca_certs=CA, secure=True)

In [7]:
sensors = [
# Потребление компрессоров
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total AP energy", 
    "channel": 2, "phase": 0, "name_in_df": "compressor1", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total AP energy",
    "channel": 3, "phase": 0, "name_in_df": "compressor2", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total AP energy",
    "channel": 4, "phase": 0, "name_in_df": "compressor3", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_145", "measurement": "Total AP energy",
    "channel": 1, "phase": 0, "name_in_df": "compressor4", "mode": "max-min"
},
# Мгновенная мощность компрессоров
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total P", 
    "channel": 2, "phase": 0, "name_in_df": "state1", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total P",
    "channel": 3, "phase": 0, "name_in_df": "state2", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_142", "measurement": "Total P",
    "channel": 4, "phase": 0, "name_in_df": "state3", "mode": "max-min"
},
{
    "db": "genesis_arena", "name": "map12e_145", "measurement": "Total P",
    "channel": 1, "phase": 0, "name_in_df": "state4", "mode": "max-min"
},
# Мгновенная мощность конденсаторов
{
    "db": "genesis_arena", "name": "map12e_23", "measurement": "Total P", 
    "channel": 1, "phase": 0, "name_in_df": "condensator4", "mode": "mean"
},
{
    "db": "genesis_arena", "name": "map12e_49", "measurement": "Total P", 
    "channel": 2, "phase": 0, "name_in_df": "condensator1", "mode": "mean"
},
{
    "db": "genesis_arena", "name": "map12e_49", "measurement": "Total P", 
    "channel": 3, "phase": 0, "name_in_df": "condensator2", "mode": "mean"
},
{
    "db": "genesis_arena", "name": "map12e_49", "measurement": "Total P", 
    "channel": 4, "phase": 0, "name_in_df": "condensator3", "mode": "mean"
},
# Мощность циркуляционного насоса
{
    "db": "genesis_arena", "name": "map12e_145", "measurement": "Total P", 
    "channel": 3, "phase": 0, "name_in_df": "power_pump", "mode": "mean"
},
# Температура льда
{
    "db": "genesis_arena", "name": "m1w2_181", "measurement": "Temperature", 
    "channel": 1, "phase": 0, "name_in_df": "temp_ice", "mode": "mean"
},
# Внешняя температура
{
    "db": "genesis_arena", "name": "weather_owm", "measurement": "Temperature",
    "channel": 0, "phase": 0, "name_in_df": "temp_outside", "mode": "mean"
},
# Внешняя влажность
{
    "db": "genesis_arena", "name": "weather_owm", "measurement": "Humidity",
    "channel": 0, "phase": 0, "name_in_df": "hum_outside", "mode": "mean"
},
# Внутреняя температура 
{
    "db": "genesis_arena", "name": "msw-v3_175", "measurement": "Temperature",
    "channel": 0, "phase": 0, "name_in_df": "temp_inside", "mode": "mean"
},
# Внутреняя влажность 
{
    "db": "genesis_arena", "name": "msw-v3_175", "measurement": "Humidity",
    "channel": 0, "phase": 0, "name_in_df": "hum_inside", "mode": "mean"
},
# Уровень движения
{
    "db": "genesis_arena", "name": "msw-v3_175", "measurement": "Max Motion",
    "channel": 0, "phase": 0, "name_in_df": "max_motion", "mode": "mean"
},
# Уровень освещения
{
    "db": "genesis_arena", "name": "msw-v3_175", "measurement": "Illuminance",
    "channel": 0, "phase": 0, "name_in_df": "illuminance", "mode": "mean"
},
# Уровень CO2
{
    "db": "genesis_arena", "name": "msw-v3_175", "measurement": "CO2",
    "channel": 0, "phase": 0, "name_in_df": "CO2", "mode": "mean"
},
# Температура конденсации
{
    "db": "genesis_arena", "name": "m1w2_245", "measurement": "Temperature",
    "channel": 2, "phase": 0, "name_in_df": "temp_condensation", "mode": "mean"
}
]

In [6]:
start_date = pd.to_datetime('2022-09-20')
date_range_list = []

while start_date < pd.Timestamp.today():
    end_date = start_date + dt.timedelta(days=1)
    date_range_list.append([start_date, end_date])
    start_date = end_date + dt.timedelta(minutes=1)

In [7]:
df = pd.DataFrame()
for start_date, end_date in tqdm(date_range_list):
    startYear, startMonth, startDay = start_date.year, start_date.month, start_date.day
    endYear, endMonth, endDay = end_date.year, end_date.month, end_date.day
    
    start = dt.datetime(startYear, startMonth, startDay, 0, 0, 0) - dt.timedelta(hours=3)
    end = dt.datetime(endYear, endMonth, endDay, 0, 0, 0) - dt.timedelta(hours=3)
    presample = dt.timedelta(minutes=1)

    dfi = wrap.get_particular_sensors(start, end, sensors, presample_time=presample,  without_confidence=True)
    df = pd.concat([df, dfi], axis=0)

  0%|          | 0/143 [00:00<?, ?it/s]

compressor3 is empty along from 2023-10-27 21:00:00 to 2023-10-28 21:00:00
compressor3 is empty along from 2023-10-28 21:00:00 to 2023-10-29 21:00:00
compressor3 is empty along from 2023-10-29 21:00:00 to 2023-10-30 21:00:00
compressor3 is empty along from 2023-10-31 21:00:00 to 2023-11-01 21:00:00
compressor3 is empty along from 2023-11-01 21:00:00 to 2023-11-02 21:00:00
compressor3 is empty along from 2023-11-02 21:00:00 to 2023-11-03 21:00:00
compressor3 is empty along from 2023-11-03 21:00:00 to 2023-11-04 21:00:00
compressor3 is empty along from 2023-11-04 21:00:00 to 2023-11-05 21:00:00
compressor3 is empty along from 2023-11-06 21:00:00 to 2023-11-07 21:00:00
compressor3 is empty along from 2023-11-07 21:00:00 to 2023-11-08 21:00:00
compressor3 is empty along from 2023-11-08 21:00:00 to 2023-11-09 21:00:00
compressor3 is empty along from 2023-11-09 21:00:00 to 2023-11-10 21:00:00
compressor3 is empty along from 2023-11-10 21:00:00 to 2023-11-11 21:00:00
compressor3 is empty alon

In [8]:
consumption_cols = ['compressor1', 'compressor2', 'compressor3', 'compressor4']
power_cols = ['state1', 'state2', 'state3', 'state4']
condensators_cols = ['condensator1', 'condensator2', 'condensator3', 'condensator4']

data = (df
 .tz_localize(None)
 .reset_index(names=['time'])
 .assign(consumption_compressors=lambda df: df[consumption_cols].sum(axis=1))
 .assign(power_compressors=lambda df: df[power_cols].sum(axis=1))
 .assign(power_condensators=lambda df: df[condensators_cols].sum(axis=1))
 .drop(columns=consumption_cols + power_cols + condensators_cols)
 )

<h2>Температура гликоля</h2>

In [9]:
client.execute('use genesis_arena')
supply_glycol_df = client.query_dataframe(f'''
    select toStartOfMinute(s.timestamp) as time,
           round(avg(s.value), 2) as temp_supply_glycol
    from storage as s
    inner join devices as d on s.device_id = d.device_id
    inner join measures as m on s.measure_id = m.measure_id
    where d.name = 'danfoss-ekc-202b_39' and 
          s.phase = 0 and 
          s.channel = 0 and
          m.name = 'Sensor Sair Temperature' and
          s.timestamp between toDateTime('2023-10-28', 'Europe/Moscow') and 
                              toDateTime('{end_date}', 'Europe/Moscow')
    group by time
    order by time
''')

supply_glycol_df['time'] = supply_glycol_df['time'].dt.tz_localize(None) + dt.timedelta(hours=3)

In [10]:
return_glycol_df = client.query_dataframe(f'''
    select toStartOfMinute(s.timestamp) as time,
           round(avg(s.value), 2) as temp_return_glycol
    from storage as s
    inner join devices as d on s.device_id = d.device_id
    inner join measures as m on s.measure_id = m.measure_id
    where d.name = 'w1' and 
          s.phase = 0 and 
          s.channel = 0 and
          m.name = '28-00000d5b2b5a' and
          s.timestamp between toDateTime('2023-10-28', 'Europe/Moscow') and 
                              toDateTime('{end_date}', 'Europe/Moscow')
    group by time
    order by time
''')

return_glycol_df['time'] = return_glycol_df['time'].dt.tz_localize(None) + dt.timedelta(hours=3)

In [11]:
data = ft.reduce(lambda df1, df2: df1.merge(df2, how='left', on='time'), [data, supply_glycol_df, return_glycol_df])

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205477 entries, 0 to 205476
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   time                     205477 non-null  datetime64[ns]
 1   power_pump               205414 non-null  float64       
 2   temp_ice                 195342 non-null  float64       
 3   temp_outside             201380 non-null  float64       
 4   hum_outside              201380 non-null  float64       
 5   temp_inside              201889 non-null  float64       
 6   hum_inside               205357 non-null  float64       
 7   max_motion               205416 non-null  float64       
 8   illuminance              159240 non-null  float64       
 9   CO2                      205411 non-null  float64       
 10  temp_condensation        205175 non-null  float64       
 11  consumption_compressors  205477 non-null  float64       
 12  power_compressor

In [15]:
# all_df = pd.read_csv('../data/data.csv')
# all_df = pd.concat([all_df, data])
# all_df.to_csv('../data/data.csv', index=False)
# data.to_csv('../data/data.csv', index=False)

In [16]:
pd.read_csv('../data/data.csv')

Unnamed: 0,time,power_pump,temp_ice,temp_outside,hum_outside,temp_inside,hum_inside,max_motion,illuminance,CO2,temp_condensation,consumption_compressors,power_compressors,power_condensators,temp_supply_glycol,temp_return_glycol
0,2023-10-28 00:00:00,14650.544922,,-1.970000,92.00,9.250000,,17.75,,515.75,19.184786,0.0,13.1850,-0.0100,,-4.81
1,2023-10-28 00:01:00,14634.635254,,-1.946250,91.85,9.275000,,18.25,,517.00,19.175929,0.0,13.0400,-0.0800,-5.15,-4.80
2,2023-10-28 00:02:00,14627.294922,,-1.914583,91.65,9.295000,51.099998,17.25,,519.75,19.167072,0.0,13.0250,-0.1350,-5.14,-4.77
3,2023-10-28 00:03:00,14652.794922,,-1.882917,91.45,9.288333,51.025000,17.75,,512.25,19.158214,0.0,13.2500,-0.1775,-5.15,-4.75
4,2023-10-28 00:04:00,14620.794922,,-1.851250,91.25,9.281667,51.049999,18.50,,507.75,19.118700,0.0,13.0550,-0.2050,-5.17,-4.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205472,2024-03-18 17:31:00,2383.794922,-4.291667,,,9.325000,47.525000,16.50,68.080002,646.75,16.291500,0.0,16.3725,-0.2500,-5.15,-4.99
205473,2024-03-18 17:32:00,2393.795044,-4.338542,,,9.400000,47.525000,18.00,67.824002,647.00,16.276409,0.0,16.2500,-0.2750,-5.10,-4.95
205474,2024-03-18 17:33:00,2386.354980,-4.187500,,,,47.350000,18.25,68.400002,642.75,16.261318,0.0,16.4550,-0.0850,-5.16,-4.94
205475,2024-03-18 17:34:00,2387.400024,-4.322917,,,,47.450000,19.25,68.400002,635.75,16.281000,0.0,16.2600,-0.1700,-5.14,-4.92
