In [2]:
import numpy as np
import pandas as pd

# Step 1: Set a random seed for reproducibility
np.random.seed(42)

# Step 2: Define parameters
n_samples = 500      # number of data points
n_features = 7       # number of features

# Step 3: Create a covariance matrix to make features correlated
base_corr = 0.9
cov = np.full((n_features, n_features), base_corr)
np.fill_diagonal(cov, 1.0)  # diagonal = 1 (self-correlation)

# Step 4: Generate multivariate normal data (highly correlated)
mean = np.zeros(n_features)
X = np.random.multivariate_normal(mean, cov, size=n_samples)

# Step 5: Define true coefficients and bias
true_weights = np.array([2.5, -1.8, 1.2, 0.8, 0.5, 1.5, -0.7])
bias = 3.0

# Step 6: Generate target variable with some noise They define how the target variable (y) 
#is generated from your features (X) using a linear relationship + randomness
noise = np.random.normal(0, 1.5, size=n_samples)
y = X.dot(true_weights) + bias + noise

# Step 7: Create DataFrame
columns = [f'Feature_{i+1}' for i in range(n_features)]
df = pd.DataFrame(X, columns=columns)
df['Target'] = y

# Step 8: Display correlation matrix
print("Feature Correlation Matrix:")
print(df.corr())

# Step 9: Save dataset
df.to_csv("highly_correlated_dataset.csv", index=False)
print("\nDataset saved as 'highly_correlated_dataset.csv'")


Feature Correlation Matrix:
           Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  Feature_6  \
Feature_1   1.000000   0.897950   0.889118   0.903796   0.896104   0.902124   
Feature_2   0.897950   1.000000   0.891180   0.906176   0.906239   0.898228   
Feature_3   0.889118   0.891180   1.000000   0.904157   0.905161   0.898464   
Feature_4   0.903796   0.906176   0.904157   1.000000   0.892401   0.911539   
Feature_5   0.896104   0.906239   0.905161   0.892401   1.000000   0.901148   
Feature_6   0.902124   0.898228   0.898464   0.911539   0.901148   1.000000   
Feature_7   0.895656   0.906879   0.899301   0.911495   0.894585   0.904232   
Target      0.897466   0.788642   0.864259   0.863332   0.846861   0.877392   

           Feature_7    Target  
Feature_1   0.895656  0.897466  
Feature_2   0.906879  0.788642  
Feature_3   0.899301  0.864259  
Feature_4   0.911495  0.863332  
Feature_5   0.894585  0.846861  
Feature_6   0.904232  0.877392  
Feature_7   1.000000  0.81478