# Control experiment

In [None]:
# Imports

import numpy as np
import plotly.graph_objects as go

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [None]:
%cd /Users/alexandrine/Library/CloudStorage/OneDrive-TechnischeUniversitätBerlin/causal_detection_of_CSD/
import functions.data_analysis as analysis

/Users/alexandrine/Library/CloudStorage/OneDrive-TechnischeUniversitätBerlin/causal_detection_of_CSD


## Retrieve simulated data

In [7]:
path = '/Users/alexandrine/Library/CloudStorage/OneDrive-TechnischeUniversitätBerlin/causal_detection_of_CSD/results_causalCSD/saddle-node/methods/controlXP/'

In [10]:
base_data_LinReg = analysis.load_and_rename_files(path, 'base-linear-0.01-LinearRegression-rmvDet-1000.dat')
base_data_KNN = analysis.load_and_rename_files(path, 'base-linear-0.01-KNeighborsRegressor-rmvDet-1000.dat')
control_data_LinReg = analysis.load_and_rename_files(path, 'control_confounderIncreasAC-linear-0.01-LinearRegression-rmvDet-1000.dat')
control_data_KNN = analysis.load_and_rename_files(path, 'control_confounderIncreasAC-linear-0.01-KNeighborsRegressor-rmvDet-1000.dat')

In [12]:
# Check if the data in base_data matches control_data
for i in range(100):
    assert (base_data_KNN['base_linear_001_KNeighborsRegressor_rmvDet_1000']['data'][i]['data'][:,1]).all() == (control_data_KNN['control_confounderIncreasAC_linear_001_KNeighborsRegressor_rmvDet_1000']['data'][i]['data'][:,2]).all()

In [None]:
# Choose for Linear regression or KNN

# Linear Regression
control_data = control_data_LinReg['control_confounderIncreasAC_linear_001_LinearRegression_rmvDet_1000']
base_data = base_data_LinReg['base_linear_001_LinearRegression_rmvDet_1000']

# KNN
#control_data = control_data_KNN['control_confounderIncreasAC_linear_001_KNeighborsRegressor_rmvDet_1000']
#base_data = base_data_KNN['base_linear_001_KNeighborsRegressor_rmvDet_1000']

## Retrieve slope values and p values for each realisation

In [16]:
slopes_control = []
len_control = len(control_data['indicator'])
for i in range(len_control):
    slope, _ = np.polyfit(np.arange(len(control_data['indicator'][i])), 
                          control_data['indicator'][i], 1)
    slopes_control.append(slope)

In [17]:
slopes_base = []
for i in range(len_control):
    slope, _ = np.polyfit(np.arange(len(base_data['indicator'][i])), 
                          base_data['indicator'][i], 1)
    slopes_base.append(slope)

In [None]:
pvalues_control = np.array(control_data['pvalue'])
pvalues_base = np.array(base_data['pvalue'][:len_control])

## Boxplot of slope difference

In [18]:
slope_differences = (np.array(slopes_control) - np.array(slopes_base))/ np.array(slopes_base) * 100

In [None]:
# Boxplot of slope differences
fig = go.Figure()
fig.add_trace(go.Box(
    y=slope_differences,
    name='',
    marker_color='rgb(115,175,72)',
    boxpoints='all',
    jitter=0.3,
    pointpos=0,
    showlegend=False
))
fig.update_layout(
    title='Slope Differences between Control and Base Data',
    yaxis_title='Slope difference (%)',
    height=500,
    width=400,
    template='simple_white'
)
fig.update_layout(
    xaxis=dict(
        range=[-0.35, 0.35],
        tickvals=[],  # optional: removes tick labels
    )
)

fig.show()

## Plot of slope for each realization

In [None]:
# Plot of slope for each realization
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=np.arange(len(slopes_base)),
    y=slopes_base,
    mode='lines+markers',
    name='Base',
    marker=dict(color='black', size=8)
)
)

fig.add_trace(go.Scatter(
    x=np.arange(len(slopes_control)),
    y=slopes_control,
    mode='lines+markers',
    name='Control',
    marker=dict(color='rgb(115,175,72)', size=8)
)
)

fig.update_layout(
    title='Slopes of Indicators for Control and Base Data',
    xaxis_title='Realisation',
    yaxis_title='Slope',
    height=500,
    width=800,
    template='simple_white',
    legend=dict(yanchor='bottom', y=0.86, x=0.05)
)
fig.show()

## Plot of pvalue for each realisation

In [None]:
# Plot of pvalue for each realization
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=np.arange(len(slopes_base)),
    y=pvalues_base,
    mode='lines+markers',
    name='Base',
    marker=dict(color='black', size=8)
)
)
fig.add_trace(go.Scatter(
    x=np.arange(len(slopes_control)),
    y=pvalues_control,
    mode='lines+markers',
    name='Control',
    marker=dict(color='rgb(115,175,72)', size=8)
)
)
fig.add_trace(go.Scatter(x=[0,100], y=[0.05, 0.05],
    mode='lines',
    line=dict(color='red', width=2, dash='dash'),
    name='p = 0.05',
    showlegend=True))

fig.add_trace(go.Scatter(x=[0,100], y=[0.01, 0.01],
    mode='lines',
    line=dict(color='orange', width=2, dash='dash'),
    name='p = 0.01',
    showlegend=True))

fig.update_layout(
    title='pvalues of Indicators for Control and Base Data',
    xaxis_title='Realisation',
    yaxis_title='pvalue',
    height=500,
    width=800,
    template='simple_white',
    legend=dict(yanchor='bottom', y=0.75, x=0.05)

)

fig.show()