In [None]:
import pandas as pd
import numpy as np
import scipy
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scripts.python.routines.betas import betas_drop_na
from plotly.subplots import make_subplots
from numpy.ma import masked_array
from scipy import stats
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pickle
import random
import plotly.express as px
import copy
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scripts.python.pheno.datasets.filter import filter_pheno
from scripts.python.pheno.datasets.features import get_column_name, get_status_dict, get_sex_dict
from scripts.python.routines.plot.scatter import add_scatter_trace
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pathlib
from scripts.python.routines.manifest import get_manifest
from scripts.python.routines.plot.save import save_figure
from scripts.python.routines.plot.layout import add_layout, get_axis
from scripts.python.routines.plot.p_value import add_p_value_annotation
from statsmodels.stats.multitest import multipletests
from sklearn.metrics import mean_absolute_error
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=False)
from pathlib import Path
from functools import reduce
from scipy.stats import chi2_contingency
from scipy.stats import kruskal, mannwhitneyu
from impyute.imputation.cs import fast_knn, mean, median, random, mice, mode, em
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import SparsePCA
from sklearn.manifold import MDS
from sklearn.manifold import Isomap
from sklearn.manifold import TSNE
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import GaussianRandomProjection
from sklearn.decomposition import FastICA
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.random_projection import SparseRandomProjection

# Prepare data

In [None]:
path = "E:/YandexDisk/EEG/experiments"

exp_type = '1st_day'
exp_sub_type = 'quasi'

path_load = f"{path}/{exp_type}"
path_save = f"{path}/special/002_dimensionality_reduction"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

df_data = pd.read_excel(f"{path_load}/data.xlsx", index_col='index')
df_features =  pd.read_excel(f"{path_load}/features_freq.xlsx", index_col='features')
classes_df  = pd.read_excel(f"{path_load}/classes/{exp_sub_type}.xlsx")
df_data = df_data.loc[df_data['class_simp'].isin(classes_df['class_simp']), :]

# Plot scatters

In [None]:


subjects = sorted(df_data['subject'].unique())
subjects = sorted(subjects, key=lambda x: float(x[1::]))
n_components = 2

pca = PCA(n_components=n_components)
data_pca = pca.fit_transform(df_data.loc[:,df_features.index.values])
for comp_id in range(n_components):
    df_data[f"PCA {comp_id + 1}"] = data_pca[:, comp_id]

targets = ["PCA 1", "PCA 2"]
fig = go.Figure()
for subj_id, subj in enumerate(subjects):
    class_value = classes_df['class_simp'][0]
    xs = df_data.loc[(df_data['subject'] == subj) & (df_data['class_simp'] == class_value), targets[0]].values
    ys = df_data.loc[(df_data['subject'] == subj) & (df_data['class_simp'] == class_value), targets[1]].values
    fig.add_trace(
        go.Scatter(
            x=xs,
            y=ys,
            showlegend=True,
            name=f"{class_value.split('_')[0]}",
            mode='markers',
            marker=dict(
                color=px.colors.qualitative.Dark24[subj_id],
                symbol='circle',
                size=15,
                opacity=0.7,
                line=dict(
                    color='black',
                    width=0.1
                )
            ),
            legendgroup=f"{subj}",
            legendgrouptitle_text=f"{subj}",
        )
    )

    class_value = classes_df['class_simp'][1]
    xs = df_data.loc[(df_data['subject'] == subj) & (df_data['class_simp'] == class_value), targets[0]].values
    ys = df_data.loc[(df_data['subject'] == subj) & (df_data['class_simp'] == class_value), targets[1]].values
    fig.add_trace(
        go.Scatter(
            x=xs,
            y=ys,
            showlegend=True,
            name=f"{class_value.split('_')[0]}",
            mode='markers',
            marker=dict(
                color=px.colors.qualitative.Dark24[subj_id],
                symbol='cross',
                size=15,
                opacity=0.7,
                line=dict(
                    color='black',
                    width=0.3
                )
            ),
            legendgroup=f"{subj}",
            legendgrouptitle_text=f"{subj}",
        )
    )

add_layout(fig, targets[0], targets[1], f"PCA")
fig.update_layout(
    template="none",
    legend=dict(
        itemsizing='constant',
        orientation="h",
        font_size=20,
    ),
    margin=go.layout.Margin(
        l=120,
        r=20,
        b=120,
        t=230,
        pad=0
    ),
    title=dict(
        text="PCA",
        xref="paper",
        font=dict(
            size=45
        ),
        y=0.99
    ),
    showlegend=True,
    xaxis=get_axis(targets[0], 30, 30),
    yaxis=get_axis(targets[1], 30, 30),
    autosize=False,
    width=900,
    height=900,
)
save_figure(fig, f"{path_save}/PCA")