# Utils

In [29]:
from typing import List, Optional

import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [30]:
def principal_component_analysis(
        data: pd.DataFrame,
        n_components: Optional[int] = 3,
        drop_columns: List[str] = ['combined1.1', 'combined2.1'],
):
    data = data.copy()

    # X <- na.omit(data[,c(3:20,23:31)])
    data.drop(columns=drop_columns, inplace=True)
    data.dropna(inplace=True)

    # pc <- princomp(scale(X))
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)

    model = PCA(n_components=n_components)
    model.fit(scaled_data)

    # (pc$sdev/sum(pc$sdev)*100)[seq(10)]
    # Вместо стандартного отклонения есть дисперсия (sigma^2 = D)
    variance_contribution = model.explained_variance_ratio_ * 100

    # pc$loadings[,seq(3)]
    components = pd.DataFrame(model.components_.T, index=data.columns)

    # pc$scores
    scores = model.transform(scaled_data)

    return variance_contribution, components, scores

In [31]:
def compare_component_scores(scores: np.ndarray, first_component_index: int, second_component_index: int) -> None:
    fig = px.scatter(
        scores,
        x=first_component_index,
        y=second_component_index,
        hover_data={'index': list(range(scores.shape[0]))},
    )

    fig.update_xaxes(title=f'Компонента №{first_component_index}')
    fig.update_yaxes(title=f'Компонента №{second_component_index}')

    fig.show()

# First day

In [32]:
data = pd.read_csv('data_big.csv')
data

Unnamed: 0.1,Unnamed: 0,X.1,depressed.mood.1,anxiety.1,suspiciousness.1,irritability.1,craving.to.alcohol.1,weakness.1,insomia.1,headache.1,...,combined2.9,HR.9,SBP.9,DBP.9,MBP.9,SV.9,CO.9,SI.9,CI.9,TPR.9
0,1,1,1,1,0,1,1,1,1,0,...,0,68.0,108.0,70.0,83.0,74.0,5.0,40.0,2.7,1321.0
1,2,2,1,1,0,0,1,1,2,1,...,9,63.0,114.0,70.0,85.0,123.0,7.7,60.0,3.8,879.0
2,3,3,1,1,0,0,0,2,1,0,...,0,64.0,120.0,80.0,93.0,106.0,6.8,55.0,3.5,1098.0
3,4,4,2,2,0,0,0,2,0,0,...,0,56.0,124.0,90.0,101.0,90.0,5.1,43.0,2.4,1600.0
4,5,5,1,1,0,0,2,2,1,0,...,0,66.0,116.0,78.0,90.0,90.0,5.9,45.0,2.2,1228.0
5,6,6,1,1,0,1,1,2,0,0,...,0,62.0,142.0,90.0,107.0,113.0,7.0,56.0,3.4,1225.0
6,7,7,1,1,0,1,1,2,2,1,...,0,85.0,110.0,84.0,93.0,46.0,3.9,21.0,1.8,1899.0
7,8,8,1,1,0,1,0,1,2,1,...,0,68.0,134.0,88.0,103.0,74.0,6.4,33.0,2.9,1290.0
8,9,9,1,1,0,0,2,1,0,0,...,0,76.0,134.0,76.0,96.0,116.0,8.6,60.0,4.4,886.0
9,10,10,1,1,0,1,1,1,2,1,...,0,64.0,110.0,70.0,83.0,43.0,2.7,27.0,1.7,2467.0


In [33]:
first_day = data.iloc[:, 2:31]
first_day_variance_contribution, first_day_components, first_day_scores = principal_component_analysis(first_day)

In [34]:
first_day_variance_contribution

In [35]:
first_day_components

Unnamed: 0,0,1,2
depressed.mood.1,-0.185961,0.109399,0.208067
anxiety.1,-0.062101,0.062217,0.098085
suspiciousness.1,-0.064675,0.271265,0.293979
irritability.1,-0.049026,0.146249,-0.017966
craving.to.alcohol.1,-0.16042,0.278009,-0.150759
weakness.1,-0.156399,0.13665,-0.139019
insomia.1,0.00148,0.065864,0.404339
headache.1,0.021112,0.272655,0.31464
tremor.1,-0.069909,0.29213,-0.180044
polyuria.1,0.034275,0.074385,0.261947


In [36]:
first_day_components[abs(first_day_components[0]) > 0.2][0]

In [37]:
first_day_components[abs(first_day_components[1]) > 0.25][1]

In [38]:
first_day_components[abs(first_day_components[2]) > 0.22][2]

In [39]:
first_day_scores

In [40]:
# plot(pc$scores[,1], pc$scores[,2])
# Картинка зеркально отображена относительно x=0 из-за противоположной направленности нулевой (первой) компоненты

compare_component_scores(first_day_scores, 0, 1)

Unsupported

In [41]:
# plot(pc$scores[,1], pc$scores[,3])
# Картинка зеркально отображена относительно x=0 из-за противоположной направленности нулевой (первой) компоненты

compare_component_scores(first_day_scores, 0, 2)

Unsupported

# Second day

In [42]:
second_day = data.iloc[:, 32:61]
second_day_variance_contribution, second_day_components, second_day_scores = (
    principal_component_analysis(second_day, drop_columns=['combined1.2', 'combined2.2'])
)

In [43]:
second_day_variance_contribution

In [44]:
second_day_components

Unnamed: 0,0,1,2
depressed.mood.2,0.09950478,0.24417,0.126563
anxiety.2,0.04883331,0.214816,-0.041591
suspiciousness.2,1.110223e-16,-0.0,0.0
irritability.2,0.124415,0.083354,0.081061
craving.to.alcohol.2,0.09850811,0.285698,0.254724
weakness.2,0.2143747,0.148435,0.205371
insomia.2,0.1526433,0.066297,0.22883
headache.2,0.1603263,0.1442,-0.03272
tremor.2,0.2272636,0.059338,-0.106806
polyuria.2,0.1610519,0.119969,-0.111032


In [45]:
second_day_components[abs(second_day_components[0]) > 0.22][0]

In [46]:
second_day_components[abs(second_day_components[1]) > 0.2][1]

In [47]:
second_day_components[abs(second_day_components[2]) > 0.2][2]

In [48]:
second_day_scores

In [49]:
compare_component_scores(second_day_scores, 0, 1)
compare_component_scores(second_day_scores, 0, 2)

Unsupported