In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# Sample data (replace this with your dataset)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Make a prediction on a test instance
test_instance = np.array([[6]])
prediction = regressor.predict(test_instance)[0]

# Calculate the prediction interval with a user-defined confidence level (e.g., 95%)
confidence_level = 0.95
prediction_interval = (prediction - np.std(y_train) * 1.96, prediction + np.std(y_train) * 1.96)

print(f"Prediction: {prediction}")
print(f"Prediction Interval (at {confidence_level*100}% confidence): {prediction_interval}")


Prediction: 11.999999999999998
Prediction Interval (at 95.0% confidence): (6.202241812562375, 17.797758187437623)


In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# Sample data (replace this with your dataset)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Define a nonconformity measure function (simple absolute difference in this case)
def nonconformity_measure(y_true, y_pred):
    return np.abs(y_true - y_pred)

# Initialize the list to store prediction intervals
prediction_intervals = []

# Perform ICP for each test instance
for test_instance, true_value in zip(X_test, y_test):
    # Make a prediction on the test instance
    prediction = regressor.predict(test_instance.reshape(1, -1))[0]
    
    # Calculate nonconformity score for the test instance
    nc_score = nonconformity_measure(true_value, prediction)
    
    # Find the k-th smallest nonconformity score from the training set (k = 1 for ICP)
    k = 1
    k_smallest_nc = np.partition(nonconformity_measure(y_train, prediction), k-1)[k-1]
    
    # Calculate the p-value (proportion of training instances with nonconformity score >= k_smallest_nc)
    p_value = np.sum(nonconformity_measure(y_train, prediction) >= k_smallest_nc) / len(y_train)
    
    # Define the significance level (1 - confidence level)
    confidence_level = 0.95
    significance = 1 - confidence_level
    
    # Calculate the prediction interval based on the p-value and significance level
    lower_bound = prediction - k_smallest_nc if p_value > significance else prediction
    upper_bound = prediction + k_smallest_nc if p_value > significance else prediction
    
    prediction_intervals.append((lower_bound, upper_bound))

# Print the prediction intervals for each test instance
for idx, test_instance in enumerate(X_test):
    lower_bound, upper_bound = prediction_intervals[idx]
    print(f"Test Instance: {test_instance[0]}, Prediction Interval: [{lower_bound}, {upper_bound}]")


Test Instance: 2, Prediction Interval: [2.0000000000000018, 6.0]
