<a href="https://colab.research.google.com/github/Maha2646/maha2646.github.io/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Step 1: Generate random dataset
X, y = make_classification(n_samples=1000, n_features=15, n_classes=2, random_state=42)

# Step 2: Load and visualize data
df = pd.DataFrame(X, columns=[f'Feature_{i}' for i in range(1, 16)])
df['Target'] = y
print("First few rows of the dataset:")
print(df.head())

# Step 3: Check and replace missing values (in this case, there are no missing values)
print("Checking for missing values:")
print(df.isnull().sum())

# Step 4: Encode categorical data (in this case, there are no categorical features)
# If categorical features exist, you would use LabelEncoder or OneHotEncoder to encode them.
# Step 5: Splitting the dataset into Training and Test set
X_train, X_test, y_train, y_test = train_test_split(df.drop('Target', axis=1), df['Target'], test_size=0.2, random_state=42)

# Step 6: Splitting the dataset into k-folds
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)
fold_indices = list(kf.split(df))

# Step 7: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Let's print out some information to verify everything
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Number of folds:", k)
print("Number of samples in each fold:")
for i, (_, fold_indices) in enumerate(fold_indices):
    print(f"Fold {i+1}: {len(fold_indices)} samples")



First few rows of the dataset:
   Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  Feature_6  \
0   0.572471   1.685432  -1.460302   0.650966   0.418515  -0.076760   
1  -0.231060  -0.338478   0.864070  -1.191875   1.419456  -0.078133   
2   1.089905  -0.173771  -0.802199  -0.571184  -0.816155   0.433768   
3   0.238171   0.653312  -0.072858  -0.840772  -0.987866   0.237337   
4   0.243657  -1.091077  -3.007632  -1.090208   1.284096   0.694764   

   Feature_7  Feature_8  Feature_9  Feature_10  Feature_11  Feature_12  \
0   0.923045  -0.117981  -0.496651    0.187942    0.422046   -0.814153   
1   0.126459   1.943529  -0.915477    0.306135   -0.199075    1.178866   
2   0.301961   0.043829  -1.455055   -1.147160    0.476887    1.868603   
3   0.085513   0.220631   0.723038    1.889279    0.489983   -0.928926   
4  -2.386930  -0.985540  -2.426392    0.571179    0.515074    1.097300   

   Feature_13  Feature_14  Feature_15  Target  
0    0.406094   -1.400229   -1.165202       0  
1