#PREDICTION OF INDIVIDUAL SEQUENCES

#Autoregressive Integrated Moving Average (ARIMA)

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Example sequence data
sequence_data = np.array([10, 20, 30, 40, 50])

# Fit ARIMA model
model = ARIMA(sequence_data, order=(1, 0, 0))
model_fit = model.fit()

# Forecast next value
forecast_value = model_fit.forecast(steps=1)[0]

# Evaluate model
true_value = 60  # Actual next value
mae = mean_absolute_error([true_value], [forecast_value])
rmse = mean_squared_error([true_value], [forecast_value], squared=False)

print("Forecasted Value:", forecast_value)
print("MAE:", mae)
print("RMSE:", rmse)

Forecasted Value: 46.35089344762744
MAE: 13.649106552372558
RMSE: 13.649106552372558


#Long Short-Term Memory (LSTM) Network

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Example sequence data (time series)
sequence_data = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]])
target_values = np.array([4, 5, 6])

# Reshape data for LSTM input (samples, timesteps, features)
X_train = sequence_data.reshape((sequence_data.shape[0], sequence_data.shape[1], 1))

# Build LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(sequence_data.shape[1], 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train LSTM model
model.fit(X_train, target_values, epochs=100, verbose=0)

# Predict next sequence value
X_test = np.array([[4, 5, 6]])  # Next sequence to predict
predicted_values = model.predict(X_test)

# Evaluate model
true_value = 7  # Actual next value
mae = mean_absolute_error([true_value], predicted_values)
rmse = mean_squared_error([true_value], predicted_values, squared=False)

print("Predicted Value:", predicted_values[0][0])
print("MAE:", mae)
print("RMSE:", rmse)

Predicted Value: 9.3935995
MAE: 2.393599510192871
RMSE: 2.393599510192871


#Random Forest Regressor

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Example sequence data
X_train = np.array([[1], [2], [3]])
y_train = np.array([2, 4, 6])

# Fit Random Forest model
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

# Predict next sequence value
X_test = np.array([[4]])  # Next value to predict
predicted_value = model.predict(X_test)

# Evaluate model
true_value = 8  # Actual next value
mae = mean_absolute_error([true_value], [predicted_value])
rmse = mean_squared_error([true_value], [predicted_value], squared=False)
r2 = r2_score([true_value], [predicted_value])

print("Predicted Value:", predicted_value[0])
print("MAE:", mae)
print("RMSE:", rmse)
print("R^2:", r2)

Predicted Value: 5.24
MAE: 2.76
RMSE: 2.76
R^2: nan




#Support Vector Machine (SVM)

In [None]:
import numpy as np
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Example sequence data
X_train = np.array([[1], [2], [3]])
y_train = np.array([2, 4, 6])

# Fit SVM model
model = SVR(kernel='rbf')
model.fit(X_train, y_train)

# Predict next sequence value
X_test = np.array([[4]])  # Next value to predict
predicted_value = model.predict(X_test)

# Evaluate model
true_value = 8  # Actual next value
mae = mean_absolute_error([true_value], [predicted_value])
rmse = mean_squared_error([true_value], [predicted_value], squared=False)
r2 = r2_score([true_value], [predicted_value])

print("Predicted Value:", predicted_value[0])
print("MAE:", mae)
print("RMSE:", rmse)

Predicted Value: 4.223128789189343
MAE: 3.776871210810657
RMSE: 3.776871210810657




#Gradient Boosting Regressor

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Example sequence data
X_train = np.array([[1], [2], [3]])
y_train = np.array([2, 4, 6])

# Fit Gradient Boosting model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
model.fit(X_train, y_train)

# Predict next sequence value
X_test = np.array([[4]])  # Next value to predict
predicted_value = model.predict(X_test)

# Evaluate model
true_value = 8  # Actual next value
mae = mean_absolute_error([true_value], [predicted_value])
rmse = mean_squared_error([true_value], [predicted_value], squared=False)
r2 = r2_score([true_value], [predicted_value])

print("Predicted Value:", predicted_value[0])
print("MAE:", mae)
print("RMSE:", rmse)

Predicted Value: 5.999946877202226
MAE: 2.000053122797774
RMSE: 2.000053122797774




#Hidden Markov Model (HMM)

In [None]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/161.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/161.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.1/161.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.3.2


In [None]:
import numpy as np
from hmmlearn import hmm

# Example sequence data
X = np.array([[1], [2], [3]])

# Fit HMM model
model = hmm.GaussianHMM(n_components=3, covariance_type="full")
model.fit(X)

# Predict next sequence value
next_value = model.predict(np.array([[4]]))

print("Next Predicted State:", next_value[0])



Next Predicted State: 1


#K-Nearest Neighbors (KNN)

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Example sequence data
X_train = np.array([[1], [2], [3]])
y_train = np.array([2, 4, 6])

# Fit KNN model
model = KNeighborsRegressor(n_neighbors=3)
model.fit(X_train, y_train)

# Predict next sequence value
X_test = np.array([[4]])  # Next value to predict
predicted_value = model.predict(X_test)

# Evaluate model
true_value = 8  # Actual next value
mae = mean_absolute_error([true_value], [predicted_value])
rmse = mean_squared_error([true_value], [predicted_value], squared=False)
r2 = r2_score([true_value], [predicted_value])

print("Predicted Value:", predicted_value[0])
print("MAE:", mae)
print("RMSE:", rmse)

Predicted Value: 4.0
MAE: 4.0
RMSE: 4.0




#Gaussian Process Regression

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Example sequence data
X_train = np.array([[1], [2], [3]])
y_train = np.array([2, 4, 6])

# Define Gaussian Process kernel
kernel = C(1.0, (1e-4, 1e1)) * RBF(1, (1e-4, 1e1))

# Fit Gaussian Process model
model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1)
model.fit(X_train, y_train)

# Predict next sequence value
X_test = np.array([[4]])  # Next value to predict
predicted_value, sigma = model.predict(X_test, return_std=True)

# Evaluate model
true_value = 8  # Actual next value
mae = mean_absolute_error([true_value], [predicted_value])
rmse = mean_squared_error([true_value], [predicted_value], squared=False)
r2 = r2_score([true_value], [predicted_value])

print("Predicted Value:", predicted_value[0])
print("MAE:", mae)
print("RMSE:", rmse)

Predicted Value: 6.4669350339689595
MAE: 1.5330649660310405
RMSE: 1.5330649660310405




#Extensions

#Supervised Learning

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR

# Load Iris dataset (for classification)
iris = load_iris()
X_iris, y_iris = iris.data, iris.target

# Split data into train and test sets for classification
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

# Standardize features for classification
scaler_iris = StandardScaler()
X_iris_train_scaled = scaler_iris.fit_transform(X_iris_train)
X_iris_test_scaled = scaler_iris.transform(X_iris_test)

# Define classification models
classification_models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC()
}

# Function to evaluate classification models
def evaluate_classification_models(models, X_train, y_train, X_test, y_test):
    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')
        results[name] = {"Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1-score": f1}
    return results

# Evaluate classification models on Iris dataset
classification_results = evaluate_classification_models(classification_models, X_iris_train_scaled, y_iris_train, X_iris_test_scaled, y_iris_test)

# Load Breast Cancer dataset (for regression)
cancer = load_breast_cancer()
X_cancer, y_cancer = cancer.data, cancer.target

# Split data into train and test sets for regression
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.2, random_state=42)

# Standardize features for regression
scaler_cancer = StandardScaler()
X_cancer_train_scaled = scaler_cancer.fit_transform(X_cancer_train)
X_cancer_test_scaled = scaler_cancer.transform(X_cancer_test)

# Define regression models
regression_models = {
    "Linear Regression": LinearRegression(),
    "K-Nearest Neighbors": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Support Vector Machine": SVR()
}

# Function to evaluate regression models
def evaluate_regression_models(models, X_train, y_train, X_test, y_test):
    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        results[name] = {"MAE": mae, "RMSE": rmse}
    return results

# Evaluate regression models on Breast Cancer dataset
regression_results = evaluate_regression_models(regression_models, X_cancer_train_scaled, y_cancer_train, X_cancer_test_scaled, y_cancer_test)

# Display classification results
print("Classification Results (Iris Dataset):")
for model, metrics in classification_results.items():
    print(f"{model}:")
    for metric, value in metrics.items():
        print(f"\t{metric}: {value}")

# Display regression results
print("\nRegression Results (Breast Cancer Dataset):")
for model, metrics in regression_results.items():
    print(f"{model}:")
    for metric, value in metrics.items():
        print(f"\t{metric}: {value}")


Classification Results (Iris Dataset):
Logistic Regression:
	Accuracy: 1.0
	Precision: 1.0
	Recall: 1.0
	F1-score: 1.0
K-Nearest Neighbors:
	Accuracy: 1.0
	Precision: 1.0
	Recall: 1.0
	F1-score: 1.0
Decision Tree:
	Accuracy: 1.0
	Precision: 1.0
	Recall: 1.0
	F1-score: 1.0
Random Forest:
	Accuracy: 1.0
	Precision: 1.0
	Recall: 1.0
	F1-score: 1.0
Support Vector Machine:
	Accuracy: 1.0
	Precision: 1.0
	Recall: 1.0
	F1-score: 1.0

Regression Results (Breast Cancer Dataset):
Linear Regression:
	MAE: 0.1969037446564639
	RMSE: 0.2531972797450529
K-Nearest Neighbors:
	MAE: 0.06140350877192982
	RMSE: 0.19010615686293011
Decision Tree:
	MAE: 0.07017543859649122
	RMSE: 0.26490647141300877
Random Forest:
	MAE: 0.06807017543859649
	RMSE: 0.18428619999547424
Support Vector Machine:
	MAE: 0.1276381828290622
	RMSE: 0.18798704463224564


#Equivalence to Tail Bounds

Chernoff Bound Implementation

In [None]:
import numpy as np

def chernoff_bound(X, t):
    """
    Compute the Chernoff bound for the sum of independent random variables.

    Parameters:
    X (list or np.array): List or array of independent random variables.
    t (float): Tail parameter for the Chernoff bound.

    Returns:
    float: Upper bound on the tail probability.
    """
    # Compute the mean of the random variables
    mean_X = np.mean(X)

    # Compute the sum of random variables
    sum_X = np.sum(X)

    # Compute the upper bound on the tail probability using the Chernoff bound formula
    exponent = -t * (1 + t) * mean_X
    upper_bound = np.exp(exponent)

    return upper_bound

# Example usage:
# Assume X is a list/array of independent random variables
X = [0.3, 0.5, 0.8, 0.2]
t = 0.5
bound = chernoff_bound(X, t)
print("Chernoff Bound:", bound)

Chernoff Bound: 0.7135519747065024


Hoeffding Inequality Implementation


In [None]:
import numpy as np

def hoeffding_inequality(X, t):
    """
    Compute the Hoeffding inequality for the sample mean of bounded random variables.

    Parameters:
    X (list or np.array): List or array of bounded random variables.
    t (float): Tail parameter for the Hoeffding inequality.

    Returns:
    float: Upper bound on the tail probability.
    """
    # Calculate the range of each random variable (assuming they are bounded)
    a = np.min(X)
    b = np.max(X)

    # Compute the number of random variables
    n = len(X)

    # Compute the upper bound on the tail probability using the Hoeffding inequality formula
    bound = np.exp(-2 * n * t**2 / ((b - a)**2))

    return bound

# Example usage:
# Assume X is a list/array of bounded random variables
X = [0.3, 0.5, 0.8, 0.2]  # Assuming each variable is in [0, 1]
t = 0.5
bound = hoeffding_inequality(X, t)
print("Hoeffding Inequality Bound:", bound)

Hoeffding Inequality Bound: 0.003865920139472811


#Bayessian Classifiers

#Gaussian Naive-Bayes Classifier

In [None]:
!pip install pgmpy

Collecting pgmpy
  Downloading pgmpy-0.1.25-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pgmpy
Successfully installed pgmpy-0.1.25


In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from pgmpy.models import NaiveBayes
from scipy.stats import norm

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Determine unique classes in target variable y
classes = np.unique(y_train)

# Initialize dictionary to store mean and standard deviation for each feature and class
parameters = {}

# Calculate mean and standard deviation for each feature in each class
for c in classes:
    # Filter training data by class
    X_class = X_train_scaled[y_train == c]

    # Calculate mean and standard deviation for each feature
    class_params = {
        'mean': np.mean(X_class, axis=0),
        'std': np.std(X_class, axis=0)
    }

    # Store parameters for the class
    parameters[c] = class_params

# Make predictions using Gaussian Naive Bayes
def predict_gaussian_naive_bayes(X_test, parameters):
    y_pred = []

    for x in X_test:
        max_prob = -1
        best_class = None

        for c, params in parameters.items():
            # Calculate class conditional probabilities using Gaussian distribution
            likelihood = np.prod(norm.pdf(x, loc=params['mean'], scale=params['std']))

            # Calculate prior probability (assume uniform prior)
            prior = 1 / len(classes)

            # Calculate posterior probability using Bayes' rule (without normalization)
            posterior = likelihood * prior

            # Choose the class with the highest posterior probability
            if posterior > max_prob:
                max_prob = posterior
                best_class = c

        y_pred.append(best_class)

    return np.array(y_pred)

# Make predictions on test data
y_pred = predict_gaussian_naive_bayes(X_test_scaled, parameters)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Display evaluation metrics
print("Gaussian Naive Bayes Classification Results (Iris Dataset - pgmpy):\n")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Gaussian Naive Bayes Classification Results (Iris Dataset - pgmpy):

Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000


#Tabular CPD (Conditional Probability Distribution) Classifier

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.factors.discrete import TabularCPD

# Define a Bayesian model
model = BayesianModel([('X', 'Y')])

# Define Conditional Probability Distributions (CPDs)
cpd_X = TabularCPD(variable='X', variable_card=2, values=[[0.6], [0.4]])
cpd_Y_given_X = TabularCPD(variable='Y', variable_card=2,
                           values=[[0.2, 0.7], [0.8, 0.3]],
                           evidence=['X'], evidence_card=[2])

# Add CPDs to the model
model.add_cpds(cpd_X, cpd_Y_given_X)

# Check model validity
model.check_model()

# Perform inference
from pgmpy.inference import VariableElimination
inference = VariableElimination(model)
result = inference.query(variables=['Y'], evidence={'X': 0})
print(result)




+------+----------+
| Y    |   phi(Y) |
| Y(0) |   0.2000 |
+------+----------+
| Y(1) |   0.8000 |
+------+----------+


#Bayesian Network Classifier

In [None]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Define a Bayesian Network model
model = BayesianNetwork([('A', 'C'), ('B', 'C')])

# Define sample data as a pandas DataFrame
data = pd.DataFrame({
    'A': [0, 1, 0, 1],
    'B': [0, 0, 1, 1],
    'C': [0, 1, 1, 0]
})

# Learn parameters (CPDs) from data using Maximum Likelihood Estimation
model.fit(data, estimator=MaximumLikelihoodEstimator)

# Perform inference using Variable Elimination
inference = VariableElimination(model)
result = inference.query(variables=['C'], evidence={'A': 0, 'B': 1})
print(result)

+------+----------+
| C    |   phi(C) |
| C(0) |   0.0000 |
+------+----------+
| C(1) |   1.0000 |
+------+----------+


#Bayesian Belief Network (BBN) Classifier

In [None]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD

# Define a Bayesian Belief Network model
model = BayesianModel([('X', 'Y'), ('Z', 'Y')])

# Define Conditional Probability Distributions (CPDs) using TabularCPD
cpd_X = TabularCPD(variable='X', variable_card=2, values=[[0.6], [0.4]])
cpd_Z = TabularCPD(variable='Z', variable_card=2, values=[[0.7], [0.3]])
cpd_Y_given_XZ = TabularCPD(variable='Y', variable_card=2,
                            values=[[0.1, 0.9, 0.8, 0.7], [0.9, 0.1, 0.2, 0.3]],
                            evidence=['X', 'Z'], evidence_card=[2, 2])

# Add CPDs to the model
model.add_cpds(cpd_X, cpd_Z, cpd_Y_given_XZ)

# Check model validity
model.check_model()

# Perform inference
from pgmpy.inference import VariableElimination
inference = VariableElimination(model)
result = inference.query(variables=['Y'], evidence={'X': 0, 'Z': 1})
print(result)



+------+----------+
| Y    |   phi(Y) |
| Y(0) |   0.9000 |
+------+----------+
| Y(1) |   0.1000 |
+------+----------+
