# Import Libraries

In [10]:
from GenerateSyntheticData import *

import statsmodels.discrete.discrete_model as DiscreteModel
import statsmodels.tools.tools as Tools 
import pandas as pd
import numpy as np


# Generate Synthetic Test Data

In [11]:
X, y = get_test_dataset(
    n_features=40,
    n_informative=5, 
    n_redundant=30,
    n_samples=10000,
    sigma_std=0.1
)

X.shape, y.shape

((10000, 40), (10000,))

# Logistic Regression Fit

In [12]:
ols = DiscreteModel.Logit(y, X)
result = ols.fit()

dict0 = {"FeatureName": X.columns, "PValue": result.pvalues}
p_values = pd.DataFrame(dict0)

Optimization terminated successfully.
         Current function value: 0.380271
         Iterations 7


In [13]:
p_values = p_values.sort_values("PValue", ascending=False)
p_values

Unnamed: 0,FeatureName,PValue
R_5,R_5,0.990333
R_7,R_7,0.982238
R_0,R_0,0.924717
R_12,R_12,0.923498
R_21,R_21,0.87023
R_1,R_1,0.828937
R_4,R_4,0.759068
R_19,R_19,0.741432
R_25,R_25,0.733219
R_15,R_15,0.661101


# Plot Results

In [14]:
import plotly.express as px

fig = px.bar(
    p_values,
    x="PValue",
    y="FeatureName",
    orientation='h'
)

fig.update_layout(
    title="p-Values computed on set of explanatory variables",
    xaxis_title="p-value",
    yaxis_title="Feature Name",
    template="plotly_dark",
    width=800, height=1200,
    # legend_title="Legend Title",
    # font=dict(
    #     family="Courier New, monospace",
    #     size=18,
    #     color="RebeccaPurple"
    # )
)

fig.show()


# Save Results

In [15]:
fig.write_image("./Figs/p_values_on_set_of_explanatory_variables.png", format='png', engine='kaleido')