In [None]:
import os

from collections import defaultdict

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from DataLoader import (
    loader,
    config
)

from Processer import preprocesser

from sklearn.decomposition import PCA

In [None]:
data_path = '../data/raw/historical'

files = os.listdir(data_path)

# make a dict, key - file_name_last part
dta = dict()
# Collection of transformed compressed signals
datasets = []
for file in files:
    df = pd.read_csv(os.path.join(data_path, file), skiprows=config.COUNT_SKIP, sep=';')
    df = loader.fill_empty(loader.transform_header(df))
    # Compress signal
    compressed = preprocesser.compress(df, floor='10min', method='mean')
    datasets.append(compressed)

min_length = min(df.shape[0] for df in datasets) 
min_index = set() 
for i, elem in enumerate(datasets):
    if elem.shape[0] == min_length:
        min_index = set(datasets[i].index)

if len(min_index) == 0:
    raise ValueError(f"Min index can not be None")

for df in datasets[1:]:
    min_index.intersection_update(df.index)
min_index = list(min_index)

for i, df in enumerate(datasets):
    datasets[i] = df.loc[min_index]

# Проверка, что даты совпадают
for i in range(len(files)):
    for j in range(i + 1, len(files)):
        assert np.setdiff1d(datasets[i].index, datasets[j].index).size == 0, f"Intersection has shape {np.setdiff1d(datasets[i].index, datasets[j].index).shape}"

for i in range(len(datasets)):
    datasets[i].sort_index(inplace=True)

for i, file in enumerate(files):

    splitted = loader.split(datasets[i].columns)
    group = loader.group(splitted, datasets[i])

    dta[file] = group

In [None]:
# Записать сгруппированные
res = loader.get_components(dta)

In [None]:
lens = set()
for array in res:
    lens.add(array.shape[0]) # Потому что мы знаем, что индексы не различаются

In [None]:
pca = PCA(n_components=2)

fig, ax = plt.subplots(5, 4, figsize=(14,14))

for i, elem in enumerate(res):
    for j in range(5): 
        if j == 2:
            continue
        if res[i].shape[0] == 10:
            sns.lineplot(preprocesser.exponential_moving_average(res[i][j*2], window=50), ax=ax[j][i])
            sns.lineplot(preprocesser.exponential_moving_average(res[i][j*2+1], window=50), ax=ax[j][i])
        else:
            sns.lineplot(preprocesser.exponential_moving_average(res[i][j], window=50), ax=ax[j][i])


Рассмотрим как будет получаться, если не брать виброперемещение (и ускорение)

In [None]:
squash_combined = preprocesser.normilize(res[2])

In [None]:
norm_signals = []
for i, signal in enumerate(res):
    norm_signals.append(preprocesser.normilize(signal))

In [None]:
r = []
for i in range(4): 
    for j in range(5):
        if j == 1 or j == 2:
            continue
        r.append(norm_signals[i][j])

r = [r[i:i+3] for i in range(0, len(r), 3)]

In [None]:
r[3]

In [None]:
# sns.lineplot(preprocesser.exponential_moving_average(abs(pca.fit_transform(norm_signals[1:3].T)[:,1]), window=100))
fig, ax = plt.subplots(4, figsize=(12, 12))
for i in range(4):
    sns.lineplot(preprocesser.exponential_moving_average(abs(pca.fit_transform(np.array(r[i]).T)[:,1]), window=100), ax=ax[i])

In [None]:
# sns.scatterplot(x=preprocesser.exponential_moving_average(abs(pca.fit_transform(preprocesser.normilize(res[0]).T))[:, 1], window=100)[:100],
#                 y=preprocesser.exponential_moving_average(abs(pca.fit_transform(preprocesser.normilize(res[0]).T))[:, 0], window=100)[:100],
#                 hue=np.arange(100))
# ----------
# sns.scatterplot(x=abs(pca.fit_transform(preprocesser.normilize(res[0]).T))[:, 0][:2000],
#                 y=abs(pca.fit_transform(preprocesser.normilize(res[0]).T))[:, 1][:2000],
#                 hue=np.arange(2000))