In [1]:
import os
import sys

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.subplots as subplots

from typing import Union, Literal

sys.path.append(os.path.abspath('..'))
from graphmodex import plotlymodex

In [2]:
n_rows = 10000
n_cols = 20

dates = pd.date_range(start="2025-01-01", end="2025-05-30", periods=n_rows)

data = {"date": dates}

for i in range(3):
    data[f"cat_30_{i}"] = np.random.choice([f"{j}" for j in range(30)], size=n_rows)
for i in range(2):
    data[f"cat_5_{i}"] = np.random.choice([f"{j}" for j in ['a','b','c','d','e']], size=n_rows)

# Generate 5 integer columns (ordered, like credit history length, loan duration)
for i in range(5):
    data[f"int_{i}"] = np.random.randint(0, 100, size=n_rows)

# Generate 5 float columns (e.g. income, loan amount, utilization)
for i in range(5):
    data[f"float_{i}"] = np.random.normal(loc=5000, scale=2000, size=n_rows)
    data[f"float_{i+4}"] = np.random.normal(loc=0, scale=1, size=n_rows)

# Generate 2 columns with mostly missing values
data["mostly_nan_1"] = np.where(np.random.rand(n_rows) < 0.95, np.nan, np.random.rand(n_rows))
data["mostly_nan_2"] = np.where(np.random.rand(n_rows) < 0.9, np.nan, np.random.rand(n_rows))

# Generate 1 column with some special missing values (inf, -inf)
special_col = np.random.normal(loc=100, scale=50, size=n_rows)
special_col[np.random.choice(n_rows, size=10, replace=False)] = np.inf
special_col[np.random.choice(n_rows, size=10, replace=False)] = -np.inf
special_col[np.random.choice(n_rows, size=100, replace=False)] = np.nan
data["special_missing"] = special_col

# Fixing the target column to handle NaNs by using float type
target = np.random.choice([0, 1], size=n_rows).astype(float)
mask = np.random.rand(n_rows) < 0.2
target[mask] = np.nan
data["target"] = target

for i, col in enumerate([col for col in data if col.startswith("cat_")]):
    if (i == 3) or (i == 1): continue
    nan_indices = np.random.choice(n_rows, size=50, replace=False)
    temp_col = np.array(data[col], dtype=object)
    temp_col[nan_indices] = np.nan
    data[col] = temp_col

# Insert a few NaNs into the float columns
for i, col in enumerate([col for col in data if col.startswith("float_")]):
    if (i == 3): break
    nan_indices = np.random.choice(n_rows, size=30, replace=False)
    temp_col = data[col]
    temp_col[nan_indices] = np.nan
    data[col] = temp_col

# Convert to DataFrame
df = pd.DataFrame(data)

In [None]:
C = np.logspace(-2, 2, 500)  # de 0.01 a 100 (concentração)
B_max = 1.0   # capacidade máxima de ligação na Cascade I
K_d = 1.0     # constante de dissociação (afinidade)
S_max1 = 1.0  # ganho máximo da Cascade II
K_e1 = 0.3    # meia resposta da Cascade II
S_max2 = 1.0  # ganho máximo da Cascade III (resposta final)
K_e2 = 0.1    # meia resposta da Cascade III
stimulus_I = (B_max * C) / (K_d + C)
stimulus_II = (S_max1 * stimulus_I) / (K_e1 + stimulus_I)
response = (S_max2 * stimulus_II) / (K_e2 + stimulus_II)

ref_line = go.Scatter(
    x=C, y=[0.5]*len(C),
    mode='lines', name='50% máx (reference)',
    line=dict(color='rgb(217, 217, 217)', dash='dot')
)
trace1 = go.Scatter(
    x=C, y=stimulus_I,
    mode='lines', name='Stimulus I (receptor)',
    line=dict(color='#000000')
)
trace2 = go.Scatter(
    x=C, y=stimulus_II,
    mode='lines', name='Stimulus II (transductor)',
    line=dict(color='#7a7a7a')
)
trace3 = go.Scatter(
    x=C, y=response,
    mode='lines', name='Final Response (physiological)',
    line=dict(width=3, color='#50dd40')
)

fig = go.Figure(data=[ref_line, trace1, trace2, trace3, ])

plotlymodex.main_layout(
    fig, title='Simulação da Cascata de Transdução de Sinal',
    x='Ligand Concentration [C]', y='Magnitude', 
    x_type='log', legend_x=0.55, legend_y=0.04, legend_border_color='black', y_range=[0, 1.1]
)