## Q 04 Generate a simulated dataset for classification application with the following properties.
### a. Number of samples = 100
### b. Number of features = 4
### c. Number of classes = 2
### d. Zero noise

In [9]:
import pandas as pd 
import numpy as np

In [11]:
def dataset_generation(n_samples=100, n_features=4, n_classes=2):
    # Create features with explicit separation
    X = np.zeros((n_samples, n_features))
    y = np.zeros(n_samples, dtype=int)
    
    # Explicitly set different values for each class
    for class_label in range(n_classes):
        start = class_label * (n_samples // n_classes)
        end = start + (n_samples // n_classes)
        
        # Create features with clear class-specific values
        X[start:end] = class_label + \
            np.linspace(0, 1, n_features) * class_label
        
        y[start:end] = class_label
    
    df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)])
    df['target'] = y
    
    return df

In [13]:
df = dataset_generation()

# Display dataset information
print("Dataset Shape:", df.shape)
print("\nFeature Columns:")
print(df.columns)
print("\nTarget Value Counts:")
print(df['target'].value_counts())
print("\nFirst few rows:")
print(df.head())

# Verify class separation
print("\nClass Separation:")
for feature in df.columns[:4]:
    print(f"\n{feature} statistics by class:")
    print(df.groupby('target')[feature].agg(['mean', 'std']))

Dataset Shape: (100, 5)

Feature Columns:
Index(['feature_1', 'feature_2', 'feature_3', 'feature_4', 'target'], dtype='object')

Target Value Counts:
target
0    50
1    50
Name: count, dtype: int64

First few rows:
   feature_1  feature_2  feature_3  feature_4  target
0        0.0        0.0        0.0        0.0       0
1        0.0        0.0        0.0        0.0       0
2        0.0        0.0        0.0        0.0       0
3        0.0        0.0        0.0        0.0       0
4        0.0        0.0        0.0        0.0       0

Class Separation:

feature_1 statistics by class:
        mean  std
target           
0        0.0  0.0
1        1.0  0.0

feature_2 statistics by class:
            mean  std
target               
0       0.000000  0.0
1       1.333333  0.0

feature_3 statistics by class:
            mean  std
target               
0       0.000000  0.0
1       1.666667  0.0

feature_4 statistics by class:
        mean  std
target           
0        0.0  0.0
1        2.