In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Generate 1000 data points
n_samples = 1000

# Generate features for class 0 (smaller values, more spread)
n_class_0 = 500
X_class_0 = np.random.normal(loc=1.0, scale=1.5, size=n_class_0)

# Generate features for class 1 (larger values, more spread)
n_class_1 = 500
X_class_1 = np.random.normal(loc=5.0, scale=1.2, size=n_class_1)

# Combine features and labels
X = np.concatenate([X_class_0, X_class_1])
y = np.concatenate([np.zeros(n_class_0), np.ones(n_class_1)])

# Shuffle the data
shuffle_idx = np.random.permutation(n_samples)
X = X[shuffle_idx]
y = y[shuffle_idx]

# Create DataFrame
df = pd.DataFrame({'X': X, 'y': y})

print(f"Generated {n_samples} data points!")
print(f"Class 0: {np.sum(y == 0)} samples")
print(f"Class 1: {np.sum(y == 1)} samples")
print(f"Feature range: [{X.min():.3f}, {X.max():.3f}]")

# Visualize the data
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
colors = ['red', 'blue']
labels = ['Class 0', 'Class 1']
for i in range(2):
    mask = y == i
    plt.scatter(X[mask], np.zeros_like(X[mask]) + i*0.1, 
               c=colors[i], label=labels[i], alpha=0.6, s=20)
plt.xlabel('Feature X')
plt.ylabel('Class')
plt.title('1000 Data Points Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.hist(X[y==0], alpha=0.7, color='red', label='Class 0', bins=50)
plt.hist(X[y==1], alpha=0.7, color='blue', label='Class 1', bins=50)
plt.xlabel('Feature X')
plt.ylabel('Frequency')
plt.title('Distribution by Class')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nFirst 10 rows:")
print(df.head(10))
print("\nLast 10 rows:")
print(df.tail(10))