#Chaining

#Stacking Models

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models
base_models = [
    ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
    ('gb', GradientBoostingRegressor(n_estimators=10, random_state=42))
]

# Initialize the stacking regressor with a linear regression meta-model
stacking_regressor = StackingRegressor(estimators=base_models, final_estimator=LinearRegression())

# Train the stacking regressor
stacking_regressor.fit(X_train, y_train)

# Make predictions
y_pred_stacking = stacking_regressor.predict(X_test)

# Evaluate the model
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
print(f"Stacking Regressor MSE: {mse_stacking}")


Stacking Regressor MSE: 0.28453653268650003


#Ensembling models

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.metrics import mean_squared_error

# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize base models
model_rf = RandomForestRegressor(n_estimators=10, random_state=42)
model_gb = GradientBoostingRegressor(n_estimators=10, random_state=42)

# Create a voting regressor
voting_regressor = VotingRegressor([('rf', model_rf), ('gb', model_gb)])

# Train the voting regressor
voting_regressor.fit(X_train, y_train)

# Make predictions
y_pred_voting = voting_regressor.predict(X_test)

# Evaluate the model
mse_voting = mean_squared_error(y_test, y_pred_voting)
print(f"Voting Regressor MSE: {mse_voting}")

Voting Regressor MSE: 0.376084845012778


#Combining models in a Pipeline

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and modeling steps
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardize features
    ('rf', RandomForestRegressor(n_estimators=10, random_state=42))  # Random Forest Regressor
])

# Train the pipeline (preprocessing + modeling)
pipeline.fit(X_train, y_train)

# Make predictions
y_pred_pipeline = pipeline.predict(X_test)

# Evaluate the pipeline
mse_pipeline = mean_squared_error(y_test, y_pred_pipeline)
print(f"Pipeline MSE: {mse_pipeline}")

Pipeline MSE: 0.28364877522100695


#Oracle Inequalities in Empirical Risk Minimization (ERM)

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Generate synthetic data
np.random.seed(0)
X = np.random.rand(100, 1)
y = 3 * X.squeeze() + np.random.randn(100)  # True relationship: y = 3*X + noise

# Define a linear regression model
model = LinearRegression()

# Fit the model (empirical risk minimization)
model.fit(X, y)

# Compute empirical risk (mean squared error)
y_pred = model.predict(X)
empirical_risk = mean_squared_error(y, y_pred)
print("Empirical Risk (MSE):", empirical_risk)

Empirical Risk (MSE): 0.9924386487246483


#Sparse Recovery

In [None]:
import cvxpy as cp

# Generate synthetic data for sparse recovery
np.random.seed(1)
n = 100
m = 50
A = np.random.randn(m, n)  # Measurement matrix
x_true = np.zeros(n)
x_true[:5] = np.random.randn(5)  # True sparse signal
y = A @ x_true + 0.1 * np.random.randn(m)  # Noisy measurements

# Solve the L1-regularized least squares problem (Lasso)
x = cp.Variable(n)
objective = cp.Minimize(cp.norm(A @ x - y, 2) + cp.norm(x, 1))
problem = cp.Problem(objective)

# Solve the problem
problem.solve()

# Recovered sparse solution
x_hat = x.value

# Print the recovered sparse solution
print("Recovered Sparse Solution:", x_hat)

Recovered Sparse Solution: [-8.86152430e-01  1.12920447e+00 -1.13031954e+00 -7.05358138e-01
  6.44321781e-01 -4.83534711e-11 -4.10539223e-03  1.14299837e-11
 -3.81802557e-03 -1.05808494e-02 -1.81442205e-12  3.90384103e-11
  3.22985132e-11 -1.11496500e-10 -2.51754896e-03 -1.22286683e-02
 -6.99323642e-03 -4.10945945e-12 -6.56299391e-04  2.64728074e-03
  5.50285023e-11  1.60930172e-10 -1.79439661e-02  1.96893880e-10
  1.18735866e-11  4.15756075e-10  7.58312822e-11 -1.01571788e-03
  7.80468680e-11  1.49830239e-02 -1.37726113e-03  3.41513813e-11
  8.70734940e-03 -1.09815788e-11  1.49596490e-11 -4.87341801e-11
 -1.22345860e-02 -2.29064836e-11 -1.02404709e-10 -1.93530087e-11
 -2.88515571e-02  3.53550822e-11 -1.07062049e-10  2.72508521e-11
 -1.83158188e-10  1.42064235e-11 -5.46078246e-03  1.61485052e-02
 -5.22370840e-11  5.57514736e-11 -1.99931300e-03 -2.44167564e-11
  7.64980182e-03  6.09056084e-03 -1.40021301e-10 -1.80660045e-10
 -3.24209694e-03  7.06173670e-12 -3.56439167e-10  1.09318753e-1

#Probability in a Banach Space (Finite-Dimensional Case)

In [None]:
import numpy as np
from scipy.stats import multivariate_normal

# Define parameters
n = 3  # Dimension of the Banach space (e.g., Euclidean space R^n)
mean = np.zeros(n)  # Mean vector
covariance_matrix = np.eye(n)  # Identity covariance matrix (for simplicity)

# Generate random vectors from a multivariate normal distribution
num_samples = 1000
random_vectors = multivariate_normal.rvs(mean=mean, cov=covariance_matrix, size=num_samples)

# Compute sample mean and sample covariance
sample_mean = np.mean(random_vectors, axis=0)
sample_covariance = np.cov(random_vectors, rowvar=False)

# Print results
print("Sample Mean:")
print(sample_mean)
print("\nSample Covariance Matrix:")
print(sample_covariance)

Sample Mean:
[ 0.02092496 -0.05026219 -0.05075703]

Sample Covariance Matrix:
[[ 1.02566656 -0.06444956 -0.01811995]
 [-0.06444956  1.02612566 -0.00559662]
 [-0.01811995 -0.00559662  0.92482936]]
