In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# Sample data (replace this with your dataset)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Make a prediction on a test instance
test_instance = np.array([[6]])
prediction = regressor.predict(test_instance)[0]

# Calculate the prediction interval with a user-defined confidence level (e.g., 95%)
confidence_level = 0.95
prediction_interval = (prediction - np.std(y_train) * 1.96, prediction + np.std(y_train) * 1.96)

print(f"Prediction: {prediction}")
print(f"Prediction Interval (at {confidence_level*100}% confidence): {prediction_interval}")


Prediction: 11.999999999999998
Prediction Interval (at 95.0% confidence): (6.202241812562375, 17.797758187437623)


In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

# Sample data (replace this with your dataset)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Define a nonconformity measure function (simple absolute difference in this case)
def nonconformity_measure(y_true, y_pred):
    return np.abs(y_true - y_pred)

# Initialize the list to store prediction intervals
prediction_intervals = []

# Perform ICP for each test instance
for test_instance, true_value in zip(X_test, y_test):
    # Make a prediction on the test instance
    prediction = regressor.predict(test_instance.reshape(1, -1))[0]
    
    # Calculate nonconformity score for the test instance
    nc_score = nonconformity_measure(true_value, prediction)
    
    # Find the k-th smallest nonconformity score from the training set (k = 1 for ICP)
    k = 1
    k_smallest_nc = np.partition(nonconformity_measure(y_train, prediction), k-1)[k-1]
    
    # Calculate the p-value (proportion of training instances with nonconformity score >= k_smallest_nc)
    p_value = np.sum(nonconformity_measure(y_train, prediction) >= k_smallest_nc) / len(y_train)
    
    # Define the significance level (1 - confidence level)
    confidence_level = 0.95
    significance = 1 - confidence_level
    
    # Calculate the prediction interval based on the p-value and significance level
    lower_bound = prediction - k_smallest_nc if p_value > significance else prediction
    upper_bound = prediction + k_smallest_nc if p_value > significance else prediction
    
    prediction_intervals.append((lower_bound, upper_bound))

# Print the prediction intervals for each test instance
for idx, test_instance in enumerate(X_test):
    lower_bound, upper_bound = prediction_intervals[idx]
    print(f"Test Instance: {test_instance[0]}, Prediction Interval: [{lower_bound}, {upper_bound}]")


Test Instance: 2, Prediction Interval: [2.0000000000000018, 6.0]


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

def inductive_conformal_prediction(X_train, y_train, X_test, confidence_level=0.95):
    """
    Perform Inductive Conformal Prediction for regression tasks.
    
    Parameters:
        X_train (numpy.ndarray): Training feature data.
        y_train (numpy.ndarray): Training target data.
        X_test (numpy.ndarray): Test feature data.
        confidence_level (float): Desired confidence level (default is 0.95).
        
    Returns:
        list: A list of tuples, each containing the lower and upper bounds of the prediction intervals
              for each test instance.
    """
    # Create and train a linear regression model
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    
    # Define a nonconformity measure function (simple absolute difference in this case)
    def nonconformity_measure(y_true, y_pred):
        return np.abs(y_true - y_pred)

    # Initialize the list to store prediction intervals
    prediction_intervals = []

    # Perform ICP for each test instance
    for test_instance in X_test:
        # Make a prediction on the test instance
        prediction = regressor.predict(test_instance.reshape(1, -1))[0]

        # Calculate nonconformity score for the test instance
        nc_score = nonconformity_measure(y_train, prediction)

        # Find the k-th smallest nonconformity score from the training set (k = 1 for ICP)
        k = 1
        k_smallest_nc = np.partition(nonconformity_measure(y_train, prediction), k-1)[k-1]

        # Calculate the p-value (proportion of training instances with nonconformity score >= k_smallest_nc)
        p_value = np.sum(nonconformity_measure(y_train, prediction) >= k_smallest_nc) / len(y_train)

        # Define the significance level (1 - confidence level)
        significance = 1 - confidence_level

        # Calculate the prediction interval based on the p-value and significance level
        lower_bound = prediction - k_smallest_nc if p_value > significance else prediction
        upper_bound = prediction + k_smallest_nc if p_value > significance else prediction

        prediction_intervals.append((lower_bound, upper_bound))
    
    return prediction_intervals

# Sample data (replace this with your dataset)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform Inductive Conformal Prediction
prediction_intervals = inductive_conformal_prediction(X_train, y_train, X_test)

# Print the prediction intervals for each test instance
for idx, test_instance in enumerate(X_test):
    lower_bound, upper_bound = prediction_intervals[idx]
    print(f"Test Instance: {test_instance[0]}, Prediction Interval: [{lower_bound}, {upper_bound}]")


Test Instance: 2, Prediction Interval: [2.0000000000000018, 6.0]


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np

def absolute_difference_nonconformity_measure(y_true, y_pred):
    """
    Simple nonconformity measure based on the absolute difference between true and predicted target values.
    
    Parameters:
        y_true (numpy.ndarray): True target values.
        y_pred (numpy.ndarray): Predicted target values.

    Returns:
        numpy.ndarray: Array of nonconformity scores for each instance.
    """
    return np.abs(y_true - y_pred)

def inductive_conformal_prediction(X_train, y_train, X_test, confidence_level=0.95):
    # ... (same as the previous implementation)
    # Rest of the code remains the same...
    # ... (same as the previous implementation)


In [4]:
import numpy as np

def generate_time_series(data, sequence_length):
    """
    Convert time series data into overlapping sequences with the specified sequence length.
    
    Parameters:
        data (numpy.ndarray): 1-D array of the time series data.
        sequence_length (int): Length of the sequences to generate.

    Returns:
        numpy.ndarray: 2-D array of overlapping sequences.
    """
    n_sequences = len(data) - sequence_length + 1
    sequences = [data[i:i+sequence_length] for i in range(n_sequences)]
    return np.array(sequences)

def absolute_difference_nonconformity_measure(y_true, y_pred):
    """
    Simple nonconformity measure based on the absolute difference between true and predicted target values.
    
    Parameters:
        y_true (numpy.ndarray): True target values.
        y_pred (numpy.ndarray): Predicted target values.

    Returns:
        numpy.ndarray: Array of nonconformity scores for each instance.
    """
    return np.abs(y_true - y_pred)

def markov_conformal_predictor(time_series, sequence_length, confidence_level=0.95):
    """
    Markov Conformal Predictor for univariate time series forecasting using autoregressive models.
    
    Parameters:
        time_series (numpy.ndarray): 1-D array of the time series data.
        sequence_length (int): Length of the sequences for autoregressive modeling.
        confidence_level (float): Desired confidence level (default is 0.95).
        
    Returns:
        numpy.ndarray: Array of tuples containing the lower and upper bounds of the prediction intervals
                       for each test instance (sequence).
    """
    # Split the time series into overlapping sequences
    sequences = generate_time_series(time_series, sequence_length)
    n_sequences = len(sequences)

    # Initialize arrays to store the predicted values and nonconformity scores
    predictions = np.zeros(n_sequences)
    nonconformity_scores = np.zeros(n_sequences)

    # Generate predictions and calculate nonconformity scores for each sequence
    for i in range(n_sequences):
        train_sequence = sequences[:i]
        test_sequence = sequences[i:i+1]

        if len(train_sequence) > 0:
            # Train an autoregressive model (e.g., ARIMA) on the train_sequence
            # Here, you may use any appropriate autoregressive model of your choice
            
            # For simplicity, we'll just use the last value of the train_sequence as the prediction
            predictions[i] = train_sequence[-1][-1]

            # Calculate the nonconformity score for the test_sequence
            nonconformity_scores[i] = absolute_difference_nonconformity_measure(test_sequence[-1][-1], predictions[i])

    # Find the k-th smallest nonconformity score from the training set (k = 1 for MCP)
    k = 1
    k_smallest_nc = np.partition(nonconformity_scores, k-1)[k-1]

    # Calculate the p-value (proportion of training instances with nonconformity score >= k_smallest_nc)
    p_values = (nonconformity_scores >= k_smallest_nc).astype(int) / n_sequences

    # Define the significance level (1 - confidence level)
    significance = 1 - confidence_level

    # Calculate the prediction intervals based on the p-values and significance level
    lower_bounds = predictions - k_smallest_nc * (p_values > significance)
    upper_bounds = predictions + k_smallest_nc * (p_values > significance)

    # Combine lower and upper bounds into prediction intervals
    prediction_intervals = np.column_stack((lower_bounds, upper_bounds))
    
    return prediction_intervals

# Sample time series data (replace this with your own time series)
time_series = np.array([10, 15, 20, 25, 30, 35, 40, 45, 50])

# Set the sequence length for autoregressive modeling
sequence_length = 3

# Perform Markov Conformal Prediction
confidence_level = 0.95
prediction_intervals = markov_conformal_predictor(time_series, sequence_length, confidence_level)

# Print the prediction intervals for each test instance (sequence)
for idx, interval in enumerate(prediction_intervals):
    print(f"Test Sequence: {time_series[idx:idx+sequence_length]}, Prediction Interval: {interval}")


Test Sequence: [10 15 20], Prediction Interval: [0. 0.]
Test Sequence: [15 20 25], Prediction Interval: [20. 20.]
Test Sequence: [20 25 30], Prediction Interval: [25. 25.]
Test Sequence: [25 30 35], Prediction Interval: [30. 30.]
Test Sequence: [30 35 40], Prediction Interval: [35. 35.]
Test Sequence: [35 40 45], Prediction Interval: [40. 40.]
Test Sequence: [40 45 50], Prediction Interval: [45. 45.]
