In [36]:
import numpy as np
import pandas as pd


In [37]:
np.random.seed(42)

# Create a single training set (broadly sampled)
n_train = 300
n_test = 100


X_train = np.random.uniform(0, 10, size=(n_train, 2))
y_train = (X_train[:, 0] + X_train[:, 1] > 10).astype(int)
df_train = pd.DataFrame(X_train, columns=['X1', 'X2'])
df_train['y'] = y_train

In [38]:
# Test Set A: random
X_test_random = np.random.uniform(0, 10, size=(n_test, 2))
y_test_random = (X_test_random[:, 0] + X_test_random[:, 1] > 10).astype(int)

# Test Set B: mix of random and near-threshold values
X_test_near = np.random.uniform(4.9, 5.1, size=(n_test, 2))         
y_test_near = (X_test_near[:, 0] + X_test_near[:, 1] > 10).astype(int)

In [39]:
# Apply monotonic transformation
def transform(X):
    X_t = X.copy()
    X_t[:, 0] *= 2     # scale X1
    X_t[:, 1] *= 0.5   # scale X2
    return X_t

# Apply transformation to all sets
X_train_scaled = transform(X_train)
X_test_random_scaled = transform(X_test_random)
X_test_near_scaled = transform(X_test_near)

In [40]:
# Save datasets
pd.DataFrame(X_train, columns=["X1", "X2"]).assign(y=y_train).to_csv("train_original.csv", index=False)
pd.DataFrame(X_train_scaled, columns=["X1", "X2"]).assign(y=y_train).to_csv("train_scaled.csv", index=False)

pd.DataFrame(X_test_random, columns=["X1", "X2"]).assign(y=y_test_random).to_csv("test_random.csv", index=False)
pd.DataFrame(X_test_random_scaled, columns=["X1", "X2"]).assign(y=y_test_random).to_csv("test_random_scaled.csv", index=False)

pd.DataFrame(X_test_near, columns=["X1", "X2"]).assign(y=y_test_near).to_csv("test_near.csv", index=False)
pd.DataFrame(X_test_near_scaled, columns=["X1", "X2"]).assign(y=y_test_near).to_csv("test_near_scaled.csv", index=False)