In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors
import plotly.graph_objects as go

# ダミーデータ作成
np.random.seed(0)
n = 200
df = pd.DataFrame({
    'treatment': np.random.binomial(1, 0.5, size=n),
    'age': np.random.normal(50, 10, size=n),
    'income': np.random.normal(60000, 15000, size=n),
    'education': np.random.normal(16, 2, size=n)
})

# 傾向スコアの推定
X = df[['age', 'income', 'education']]
y = df['treatment']
model = LogisticRegression()
df['pscore'] = model.fit(X, y).predict_proba(X)[:, 1]

# マッチング処理
treated = df[df['treatment'] == 1]
control = df[df['treatment'] == 0]
nn = NearestNeighbors(n_neighbors=1)
nn.fit(control[['pscore']])
_, indices = nn.kneighbors(treated[['pscore']])
matched_control = control.iloc[indices.flatten()].copy()

# SMDの関数
def compute_smd(df1, df2, covariates):
    smd = []
    for var in covariates:
        m1, m2 = df1[var].mean(), df2[var].mean()
        s = np.sqrt((df1[var].var() + df2[var].var()) / 2)
        smd.append((m1 - m2) / s)
    return np.array(smd)

covariates = ['age', 'income', 'education']
smd_before = compute_smd(treated, control, covariates)
smd_after = compute_smd(treated, matched_control, covariates)

# PlotlyのLove Plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=smd_before,
    y=covariates,
    mode='markers',
    name='Before Matching',
    marker=dict(color='blue', symbol='circle', size=10)
))

fig.add_trace(go.Scatter(
    x=smd_after,
    y=covariates,
    mode='markers',
    name='After Matching',
    marker=dict(color='green', symbol='square', size=10)
))

# SMDの閾値 ±0.1 の線
fig.add_shape(type="line", x0=0.1, y0=-1, x1=0.1, y1=len(covariates), line=dict(color="red", dash="dash"))
fig.add_shape(type="line", x0=-0.1, y0=-1, x1=-0.1, y1=len(covariates), line=dict(color="red", dash="dash"))

fig.update_layout(
    title="Love Plot (Plotly)",
    xaxis_title="Standardized Mean Difference",
    yaxis_title="Covariates",
    xaxis=dict(zeroline=True),
    template="plotly_white",
    legend=dict(x=0.8, y=1.2)
)

fig.show()


In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors
import plotly.graph_objects as go

# ダミーデータ作成
np.random.seed(0)
n = 200
df = pd.DataFrame({
    'treatment': np.random.binomial(1, 0.5, size=n),
    'age': np.random.normal(50, 10, size=n),
    'income': np.random.normal(60000, 15000, size=n),
    'education': np.random.normal(16, 2, size=n)
})

# 傾向スコアの推定
X = df[['age', 'income', 'education']]
y = df['treatment']
model = LogisticRegression()
df['pscore'] = model.fit(X, y).predict_proba(X)[:, 1]

# 最近傍マッチング
treated = df[df['treatment'] == 1]
control = df[df['treatment'] == 0]
nn = NearestNeighbors(n_neighbors=1)
nn.fit(control[['pscore']])
_, indices = nn.kneighbors(treated[['pscore']])
matched_control = control.iloc[indices.flatten()].copy()

# AMD（Absolute Mean Difference）計算関数
def compute_amd(df1, df2, covariates):
    amd = []
    for var in covariates:
        m1, m2 = df1[var].mean(), df2[var].mean()
        amd.append(abs(m1 - m2))  # 標準化しない絶対差
    return np.array(amd)

covariates = ['age', 'income', 'education']
amd_before = compute_amd(treated, control, covariates)
amd_after = compute_amd(treated, matched_control, covariates)

# AMD Love Plot (Plotly)
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=amd_before,
    y=covariates,
    mode='markers',
    name='Before Matching',
    marker=dict(color='blue', symbol='circle', size=10)
))

fig.add_trace(go.Scatter(
    x=amd_after,
    y=covariates,
    mode='markers',
    name='After Matching',
    marker=dict(color='green', symbol='square', size=10)
))

fig.update_layout(
    title="Love Plot with Absolute Mean Difference (AMD)",
    xaxis_title="Absolute Mean Difference",
    yaxis_title="Covariates",
    template="plotly_white",
    legend=dict(x=0.8, y=1.2)
)

fig.show()

df.head()

Unnamed: 0,treatment,age,income,education,pscore
0,1,61.266359,32353.956748,11.271652,0.515666
1,1,39.200685,52830.389939,16.040668,0.525567
2,1,38.525313,52805.16279,13.304149,0.525555
3,1,45.6218,69305.374475,14.476853,0.533519
4,0,45.019675,70476.857237,20.022513,0.534084


In [4]:
from graphviz import Digraph

dot = Digraph()

dot.attr(fontname='MS Gothic')  # または 'MS Mincho', 'Meiryo' など
dot.node('I', '介入', fontname='MS Gothic')
dot.node('Y', 'アウトカム', fontname='MS Gothic')
dot.node('C', '収入（交絡因子）', fontname='MS Gothic')

dot.edge('C', 'I')
dot.edge('C', 'Y')
dot.edge('I', 'Y')

dot.render('dag_japanese', format='png', cleanup=False)

dot.view()


'dag_japanese.pdf'