# Neural Network Fundamentals


## Linearly Separable Data Generation

Generate two clusters of points that can be separated by a linear boundary.


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def generate_linearly_separable_data(n_samples=100, separation=2.0):
    """Generate two linearly separable clusters"""
    np.random.seed(42)
    cluster1 = np.random.randn(n_samples, 2) + [separation, separation]
    cluster2 = np.random.randn(n_samples, 2) + [-separation, -separation]
    X = np.vstack([cluster1, cluster2])
    y = np.array([0] * n_samples + [1] * n_samples)
    return X, y


## XOR Dataset Generation

Generate the XOR dataset - a classic non-linearly separable problem.


In [None]:
def generate_xor_data():
    """Generate XOR dataset"""
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([0, 1, 1, 0])
    return X, y


## Data Visualization

Visualize the clusters and their labels.


In [None]:
import matplotlib.pyplot as plt

def visualize_data(X, y, title='Data Clusters'):
    """Visualize 2D data points"""
    plt.figure(figsize=(8, 6))
    plt.scatter(X[y==0][:, 0], X[y==0][:, 1], c='blue', label='Class 0', alpha=0.6)
    plt.scatter(X[y==1][:, 0], X[y==1][:, 1], c='red', label='Class 1', alpha=0.6)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()


## Linear SVM Classification

Using scikit-learn to find a decision boundary for binary classification.


In [None]:
from sklearn.svm import LinearSVC

# Generate and visualize linearly separable data
X_linear, y_linear = generate_linearly_separable_data()
visualize_data(X_linear, y_linear, 'Linearly Separable Data')

# Train Linear SVM
svm_linear = LinearSVC(random_state=42, max_iter=10000)
svm_linear.fit(X_linear, y_linear)
print(f'Linear SVM accuracy: {svm_linear.score(X_linear, y_linear):.4f}')


## Decision Boundary Visualization

Visualize the decision boundary learned by the classifier.


In [None]:
def plot_decision_boundary(X, y, model, title='Decision Boundary'):
    """Plot decision boundary for 2D classification"""
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.RdYlBu)
    plt.scatter(X[y==0][:, 0], X[y==0][:, 1], c='blue', label='Class 0', edgecolors='k')
    plt.scatter(X[y==1][:, 0], X[y==1][:, 1], c='red', label='Class 1', edgecolors='k')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.legend()
    plt.show()


In [None]:
# Visualize decision boundary for linear data
plot_decision_boundary(X_linear, y_linear, svm_linear, 'Linear SVM on Linearly Separable Data')


In [None]:
# Try Linear SVM on XOR dataset
X_xor, y_xor = generate_xor_data()
visualize_data(X_xor, y_xor, 'XOR Dataset')

svm_xor_linear = LinearSVC(random_state=42, max_iter=10000)
svm_xor_linear.fit(X_xor, y_xor)
print(f'Linear SVM on XOR accuracy: {svm_xor_linear.score(X_xor, y_xor):.4f}')
plot_decision_boundary(X_xor, y_xor, svm_xor_linear, 'Linear SVM on XOR (Poor Performance)')


## Model Optimization

Optimize the model using different kernels.


In [None]:
from sklearn.svm import SVC

# Try RBF kernel on XOR with tuning
svm_xor_rbf = SVC(kernel='rbf', gamma='scale', C=1.0, random_state=42)
svm_xor_rbf.fit(X_xor, y_xor)
print(f'RBF SVM on XOR accuracy: {svm_xor_rbf.score(X_xor, y_xor):.4f}')
plot_decision_boundary(X_xor, y_xor, svm_xor_rbf, 'RBF SVM on XOR (Perfect Separation)')


## Observations: Linear vs Non-Linear Classification

Linear models work well for linearly separable data but fail on non-linear problems like XOR.
Non-linear kernels (like RBF) can handle complex decision boundaries.


## Impact of Outliers on Decision Boundary

Analyze how outliers affect the decision boundary.


In [None]:
# Create outliers by flipping labels
def add_outliers(X, y, n_outliers=5):
    """Add outliers by randomly flipping labels"""
    X_outlier = X.copy()
    y_outlier = y.copy()
    outlier_indices = np.random.choice(len(y), n_outliers, replace=False)
    y_outlier[outlier_indices] = 1 - y_outlier[outlier_indices]
    return X_outlier, y_outlier


In [None]:
# Add outliers and retrain
np.random.seed(42)
X_outlier, y_outlier = add_outliers(X_linear, y_linear, n_outliers=10)
visualize_data(X_outlier, y_outlier, 'Data with Outliers')
