In [None]:
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb

# Re-generate the S-curve data
np.random.seed(0)
x = np.linspace(0, 10, 1000)
y = 20 / (1 + np.exp(-x + 5))
y += (np.random.normal(0, 1, size=len(x)) * (1 + 0.5 * x))

# Prepare the data for LightGBM
data = np.column_stack((x, x**2, x**3))
dataset = lgb.Dataset(data, label=y)

# Quantiles to calculate
quantiles = [0.05, 0.95]

# Plot the original data
plt.scatter(x, y, label='Data points', alpha=0.5)

# Fit LightGBM models and plot for each quantile
for quantile in quantiles:
    params = {
        'objective': 'quantile',
        'alpha': quantile,
        'learning_rate': 0.05,
        'num_leaves': 10,
        'metric': 'quantile',
        'verbose': -1
    }
    
    # Train the model
    gbm = lgb.train(params, dataset, num_boost_round=100)
    
    # Predict and plot
    y_pred = gbm.predict(data)
    plt.plot(x, y_pred, label=f'Q({int(quantile*100)}%)')

plt.xlabel('Biological Response')
plt.ylabel('Drug Dosage')
plt.title('Non-linear Quantile Regression with S-curve Data using LightGBM')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
X, y = make_regression(n_samples=10000,n_features=1,n_informative=1, n_targets=1,random_state=42)

Xs = pd.DataFrame(X, columns = ['distance'])
ys = pd.DataFrame(y, columns = ['time_to_buyer'])

Xs['distance'] = Xs['distance'].apply(lambda x: 10 + 2 * (x + np.random.normal(loc=1))  )
ys['time_to_buyer'] = ys['time_to_buyer'].apply(lambda x: 60 + 0.3* (x + np.random.normal(loc=1)) )

df = Xs.merge(ys, left_index=True, right_index=True)

In [None]:
train_df, test_df = train_test_split(df, test_size=0.10, shuffle=True)
X_train, y_train = train_df[['distance']], train_df[['time_to_buyer']]
X_cal, y_cal = test_df[['distance']], test_df[['time_to_buyer']]

In [None]:
plt.scatter(X_train, y_train, s = 0.5)
plt.xlabel('Distance')
plt.ylabel('Distance to Buyer')

In [None]:
from sklearn.linear_model import QuantileRegressor
from sklearn.utils.fixes import parse_version, sp_version
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"

In [None]:
classifiers = {}
qr_05 = QuantileRegressor(quantile = 0.05, alpha = 0, solver = solver)
y_pred_05 = qr_05.fit(X_train, y_train).predict(X_cal)
qr_95 = QuantileRegressor(quantile = 0.95, alpha = 0, solver = solver)
y_pred_95 = qr_95.fit(X_train, y_train).predict(X_cal)
classifiers[str(0.05)] = {'clf': qr_05, 'predictions': y_pred_05}
classifiers[str(0.95)] = {'clf': qr_95, 'predictions': y_pred_95}

In [None]:
data = pd.DataFrame({'distance': X_cal.reset_index()['distance'],
              '0.05': classifiers['0.05']['predictions'],
              '0.95': classifiers['0.95']['predictions'],
              'time_to_buyer': y_cal.reset_index()['time_to_buyer']})

data = data.sort_values(by='distance', ascending=True)

In [None]:
plt.scatter(data['distance'], data['time_to_buyer'], s = 0.5, color = 'black')
plt.xlabel('Distance')
plt.ylabel('Distance to Buyer')
plt.plot(X_cal.sort_values(by = 'distance'), data['0.05'], color = 'red')
plt.plot(X_cal.sort_values(by = 'distance'), data['0.95'], color = 'blue')

In [None]:
n = 1000
cal_labels = y_cal
model_lower = qr_05.predict(X_cal)
model_upper = qr_95.predict(X_cal)

cal_labels_np = np.array(cal_labels['time_to_buyer'])

cal_scores = np.maximum(cal_labels_np-model_upper, model_lower-cal_labels_np)

alpha = 0.05
qhat = np.quantile(cal_scores, np.ceil((n+1)*(1-alpha))/n, interpolation='higher')

prediction_sets = [model_lower - qhat, model_upper + qhat]

conformalized_05 = sorted(prediction_sets[0])
conformalized_95 = sorted(prediction_sets[1])

X_cal.sort_values(by = 'distance')

In [None]:
plt.scatter(X_cal, y_cal, s = 0.5, color = 'black')
plt.xlabel('Distance')
plt.ylabel('Distance to Buyer')
plt.plot(X_cal.sort_values(by = 'distance'), conformalized_05, color = 'red')
plt.plot(X_cal.sort_values(by = 'distance'), conformalized_95, color = 'blue')