In [None]:
import pandas as pd
import numpy as np
import scipy
from scripts.python.routines.betas import betas_drop_na
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scripts.python.routines.betas import betas_drop_na
from plotly.subplots import make_subplots
from scipy import stats
import pickle
import random
import plotly.express as px
import copy
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scripts.python.pheno.datasets.filter import filter_pheno
from scripts.python.pheno.datasets.features import get_column_name, get_status_dict, get_sex_dict
from scripts.python.routines.plot.scatter import add_scatter_trace
import plotly.graph_objects as go
import pathlib
from scripts.python.routines.manifest import get_manifest
from scripts.python.routines.plot.save import save_figure
from scripts.python.routines.plot.layout import add_layout, get_axis
from scripts.python.routines.plot.p_value import add_p_value_annotation
from statsmodels.stats.multitest import multipletests
from sklearn.metrics import mean_absolute_error
from impyute.imputation.cs import fast_knn, mean, median, random, mice, mode, em
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=False)
from functools import reduce
from scipy.stats import kruskal, mannwhitneyu
from pycombat import Combat
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import upsetplot as upset
import seaborn as sns
import missingno as msno
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=False)
import matplotlib.pyplot as plt
import pathlib

# Read the data

In [None]:
dataset = "GSEUNN"
path = f"E:/YandexDisk/Work/pydnameth/datasets"
datasets_info = pd.read_excel(f"{path}/datasets.xlsx", index_col='dataset')
platform = datasets_info.loc[dataset, 'platform']
manifest = get_manifest(platform, path=path)

feats = pd.read_excel(f"{path}/{platform}/{dataset}/data/immuno/immuno_markers_genes.xlsx").loc[:, 'gene'].values.tolist()
feats_raw_dict = {x: f"{x}_raw" for x in feats}
feats_harm_dict = {x: f"{x}_harm" for x in feats}

df_raw = pd.read_excel(f"{path}/{platform}/{dataset}/special/021_ml_data/immuno/260_imp(fast_knn)_replace(quarter).xlsx", index_col="index")
df_raw = df_raw.loc[:, feats]
df_raw.rename(columns=feats_raw_dict, inplace=True)
df_harm = pd.read_excel(f"{path}/{platform}/{dataset}/special/021_ml_data/immuno/global_data/all_1052_121222/df_samples(ctrl_550_from_all_1052_121222)_proc(# )_imp(fast_knn)_replace(quarter).xlsx", index_col="index")
df_harm = df_harm.loc[:, feats + ["Age", "Sex", "Status", "Region"]]
df_harm.rename(columns=feats_harm_dict, inplace=True)

df = pd.merge(df_raw, df_harm, left_index=True, right_index=True)
print(df.shape)

path_save = f"{path}/{platform}/{dataset}/special/039_harmonized_vs_raw"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

# Scatter plots

In [None]:
path_local = f"plots"
pathlib.Path(f"{path_save}/{path_local}").mkdir(parents=True, exist_ok=True)

for feat in feats:
    feat_raw = feats_raw_dict[feat]
    feat_harm = feats_harm_dict[feat]

    min_val = df[[feat_raw,feat_harm]].min().min()
    max_val = df[[feat_raw,feat_harm]].max().max()

    fig = go.Figure()
    fig.add_trace(
        go.Violin(
            y=df.loc[:, feat_raw],
            name="Raw",
            box_visible=True,
            meanline_visible=True,
            showlegend=False,
            line_color='black',
            fillcolor="red",
            marker=dict(color="red", line=dict(color='black',width=0.3), opacity=0.8),
            points='all',
            bandwidth = np.ptp(df.loc[:, feat_raw]) / 25,
            opacity=0.8
        )
    )
    fig.add_trace(
        go.Violin(
            y=df.loc[:, feat_harm],
            name="Harmonized",
            box_visible=True,
            meanline_visible=True,
            showlegend=False,
            line_color='black',
            fillcolor="blue",
            marker=dict(color="blue", line=dict(color='black',width=0.3), opacity=0.8),
            points='all',
            bandwidth = np.ptp(df.loc[:, feat_harm]) / 25,
            opacity=0.8
        )
    )
    add_layout(fig, "", f"{feat}", f"")
    fig.update_layout(title_xref='paper')
    fig.update_layout(legend_font_size=20)
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_layout(
        margin=go.layout.Margin(
            l=110,
            r=20,
            b=50,
            t=40,
            pad=0
        )
    )
    fig.update_layout(legend_y=1.01)
    save_figure(fig, f"{path_save}/{path_local}/violin_{feat}Acc")

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=df.loc[:, feat_raw].values,
            y=df.loc[:, feat_harm].values,
            showlegend=False,
            name="",
            mode="markers",
            marker_color="red",
            marker=dict(
                size=8,
                opacity=0.85,
                color="red",
                line=dict(
                    color="black",
                    width=0.5
                )
            )
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[min_val, max_val],
            y=[min_val, max_val],
            showlegend=False,
            name="",
            mode="lines",
            marker_color="black",
            marker=dict(
                size=8,
                opacity=0.75,
                line=dict(
                    color="black",
                    width=0.5
                )
            )
        )
    )
    add_layout(fig, f"Raw", f"Harmonized", f"{feat}")
    fig.update_layout(legend_font_size=20)
    fig.update_xaxes(autorange=False)
    fig.update_yaxes(autorange=False)
    fig.update_layout(xaxis_range=[min_val, max_val])
    fig.update_layout(yaxis_range=[min_val, max_val])
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_yaxes(
        scaleanchor="x",
        scaleratio=1,
    )
    fig.update_layout(
        margin=go.layout.Margin(
            l=110,
            r=20,
            b=80,
            t=40,
            pad=0,
        )
    )
    save_figure(fig, f"{path_save}/{path_local}/scatter_{feat}")