In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Generate a dataset with an order-dependent pattern
np.random.seed(42)
X = np.arange(10).reshape(-1, 1)
print("X dataset:" , X)
y = 3 * X.flatten() + np.random.randn(10) * 10


# Split data without shuffling
X_train_no_shuffle, X_test_no_shuffle, y_train_no_shuffle, y_test_no_shuffle = train_test_split(X, y, test_size=0.2, shuffle=False)
print("X without shuffling:", X_train_no_shuffle)

# Split data with shuffling
X_train_shuffle, X_test_shuffle, y_train_shuffle, y_test_shuffle = train_test_split(X, y, test_size=0.2, shuffle=True)
print("X with shuffling:", X_train_shuffle)


# Train linear regression model without shuffling
model_no_shuffle = LinearRegression()
model_no_shuffle.fit(X_train_no_shuffle, y_train_no_shuffle)
y_pred_no_shuffle = model_no_shuffle.predict(X_test_no_shuffle)
mse_no_shuffle = mean_squared_error(y_test_no_shuffle, y_pred_no_shuffle)

# Train linear regression model with shuffling
model_shuffle = LinearRegression()
model_shuffle.fit(X_train_shuffle, y_train_shuffle)
y_pred_shuffle = model_shuffle.predict(X_test_shuffle)
mse_shuffle = mean_squared_error(y_test_shuffle, y_pred_shuffle)

print(f"MSE without shuffling: {mse_no_shuffle:.2f}")
print(f"MSE with shuffling: {mse_shuffle:.2f}")


X dataset: [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
X without shuffling: [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]]
X with shuffling: [[8]
 [7]
 [9]
 [5]
 [1]
 [3]
 [0]
 [4]]
MSE without shuffling: 98.86
MSE with shuffling: 92.86
