In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries imported successfully!")


In [None]:
class SimpleDataAugmentation:
    """Simple data augmentation for tabular/image-like data"""
    
    def __init__(self, noise_factor=0.1, scale_factor=0.1):
        self.noise_factor = noise_factor
        self.scale_factor = scale_factor
    
    def add_noise(self, x):
        """Add Gaussian noise"""
        noise = np.random.normal(0, self.noise_factor, x.shape)
        return x + noise
    
    def scale(self, x):
        """Random scaling"""
        scale = np.random.normal(1, self.scale_factor)
        return x * scale
    
    def rotate_2d(self, x):
        """Simple 2D rotation for 2D data"""
        if x.shape[-1] != 2:
            return x
        
        angle = np.random.uniform(-np.pi/6, np.pi/6)  # ±30 degrees
        cos_angle, sin_angle = np.cos(angle), np.sin(angle)
        rotation_matrix = np.array([[cos_angle, -sin_angle],
                                   [sin_angle, cos_angle]])
        
        return np.dot(x, rotation_matrix.T)
    
    def augment(self, x):
        """Apply random augmentation"""
        x = x.copy()
        
        # Apply random combination of augmentations
        if np.random.random() > 0.5:
            x = self.add_noise(x)
        if np.random.random() > 0.5:
            x = self.scale(x)
        if x.shape[-1] == 2 and np.random.random() > 0.5:
            x = self.rotate_2d(x)
            
        return x

# Create sample dataset
print("Creating sample dataset...")
X, y = make_classification(
    n_samples=1000, 
    n_features=2, 
    n_redundant=0, 
    n_informative=2,
    n_clusters_per_class=2,
    random_state=42
)

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

print(f"Dataset shape: {X.shape}")
print(f"Number of classes: {len(np.unique(y))}")

# Initialize augmentation
augmenter = SimpleDataAugmentation(noise_factor=0.1, scale_factor=0.05)

# Demonstrate augmentations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Original data
axes[0, 0].scatter(X[:, 0], X[:, 1], c=y, alpha=0.6, cmap='viridis')
axes[0, 0].set_title('Original Data')
axes[0, 0].set_xlabel('Feature 1')
axes[0, 0].set_ylabel('Feature 2')

# Different augmentations
augmentation_names = ['Noise', 'Scale', 'Rotation', 'Combined', 'Multiple Augmented']

for i, name in enumerate(augmentation_names):
    row = i // 3
    col = (i + 1) % 3
    
    if name == 'Noise':
        X_aug = augmenter.add_noise(X)
    elif name == 'Scale':
        X_aug = augmenter.scale(X)
    elif name == 'Rotation':
        X_aug = augmenter.rotate_2d(X)
    elif name == 'Combined':
        X_aug = augmenter.augment(X)
    else:  # Multiple augmented
        X_aug = np.vstack([augmenter.augment(X) for _ in range(3)])
        y_aug = np.tile(y, 3)
        axes[row, col].scatter(X_aug[:, 0], X_aug[:, 1], c=y_aug, alpha=0.6, cmap='viridis')
        axes[row, col].set_title(f'{name} (3x data)')
        axes[row, col].set_xlabel('Feature 1')
        axes[row, col].set_ylabel('Feature 2')
        continue
    
    axes[row, col].scatter(X_aug[:, 0], X_aug[:, 1], c=y, alpha=0.6, cmap='viridis')
    axes[row, col].set_title(f'{name} Augmentation')
    axes[row, col].set_xlabel('Feature 1')
    axes[row, col].set_ylabel('Feature 2')

plt.tight_layout()
plt.show()

print("Data augmentation strategies demonstrated!")


In [None]:
class SimpleContrastiveLearner:
    """Simple contrastive learning implementation"""
    
    def __init__(self, input_dim, hidden_dim=64, output_dim=32, temperature=0.1, learning_rate=0.01):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.temperature = temperature
        self.learning_rate = learning_rate
        
        # Initialize encoder network (simple 2-layer MLP)
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.1
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, output_dim) * 0.1
        self.b2 = np.zeros((1, output_dim))
        
        self.training_losses = []
        
    def relu(self, x):
        return np.maximum(0, x)
    
    def l2_normalize(self, x, axis=-1):
        """L2 normalize along specified axis"""
        norm = np.linalg.norm(x, axis=axis, keepdims=True)
        return x / (norm + 1e-8)
    
    def encode(self, x):
        """Encode input to representation space"""
        # Forward pass through encoder
        h1 = self.relu(np.dot(x, self.W1) + self.b1)
        h2 = np.dot(h1, self.W2) + self.b2
        
        # L2 normalize the output
        return self.l2_normalize(h2)
    
    def cosine_similarity(self, a, b):
        """Compute cosine similarity between normalized vectors"""
        return np.dot(a, b.T)
    
    def contrastive_loss(self, z_i, z_j, batch_size):
        """NT-Xent loss (Normalized Temperature-scaled Cross Entropy)"""
        # Concatenate positive pairs
        z = np.concatenate([z_i, z_j], axis=0)  # Shape: (2*batch_size, output_dim)
        
        # Compute similarity matrix
        sim_matrix = self.cosine_similarity(z, z) / self.temperature
        
        # Create masks for positive pairs
        batch_size = z_i.shape[0]
        mask = np.eye(2 * batch_size, dtype=bool)
        
        # Positive pairs: (i, i+batch_size) and (i+batch_size, i)
        pos_mask = np.zeros((2 * batch_size, 2 * batch_size), dtype=bool)
        for i in range(batch_size):
            pos_mask[i, i + batch_size] = True
            pos_mask[i + batch_size, i] = True
        
        # Remove self-similarities
        sim_matrix = sim_matrix[~mask].reshape(2 * batch_size, -1)
        pos_sim = sim_matrix[pos_mask[~mask].reshape(2 * batch_size, -1)]
        
        # Compute loss (simplified version)
        numerator = np.exp(pos_sim)
        denominator = np.sum(np.exp(sim_matrix), axis=1)
        loss = -np.mean(np.log(numerator / denominator))
        
        return loss
    
    def simplified_contrastive_loss(self, z_i, z_j):
        """Simplified contrastive loss for easier implementation"""
        batch_size = z_i.shape[0]
        
        # Positive similarity (between augmented pairs)
        pos_sim = np.sum(z_i * z_j, axis=1) / self.temperature
        
        # Negative similarities (all other pairs)
        neg_sim_i = np.dot(z_i, z_j.T) / self.temperature
        neg_sim_j = np.dot(z_j, z_i.T) / self.temperature
        
        # Remove diagonal (positive pairs)
        mask = np.eye(batch_size, dtype=bool)
        neg_sim_i = neg_sim_i[~mask].reshape(batch_size, -1)
        neg_sim_j = neg_sim_j[~mask].reshape(batch_size, -1)
        
        # Compute loss
        pos_loss = -np.mean(pos_sim)
        neg_loss = np.mean(np.log(np.sum(np.exp(neg_sim_i), axis=1) + 
                                 np.sum(np.exp(neg_sim_j), axis=1)))
        
        return pos_loss + neg_loss
    
    def train_step(self, x1, x2):
        """Single training step"""
        batch_size = x1.shape[0]
        
        # Forward pass
        z1 = self.encode(x1)
        z2 = self.encode(x2)
        
        # Compute loss
        loss = self.simplified_contrastive_loss(z1, z2)
        
        # Simple gradient computation (placeholder - in practice use autograd)
        # For demonstration, we'll just store the loss
        self.training_losses.append(loss)
        
        return loss, z1, z2
    
    def get_representations(self, x):
        """Get learned representations"""
        return self.encode(x)

# Demonstrate contrastive learning
print("=== Contrastive Learning Training ===")

# Create positive pairs through augmentation
batch_size = 64
n_batches = 50

learner = SimpleContrastiveLearner(
    input_dim=X.shape[1],
    hidden_dim=32,
    output_dim=16,
    temperature=0.1
)

# Training loop
for batch in range(n_batches):
    # Sample random batch
    indices = np.random.choice(X.shape[0], batch_size, replace=True)
    x_batch = X[indices]
    
    # Create positive pairs through augmentation
    x1 = augmenter.augment(x_batch)
    x2 = augmenter.augment(x_batch)
    
    # Training step
    loss, z1, z2 = learner.train_step(x1, x2)
    
    if batch % 10 == 0:
        print(f"Batch {batch}, Loss: {loss:.4f}")

# Plot training progress
plt.figure(figsize=(10, 5))
plt.plot(learner.training_losses)
plt.title('Contrastive Learning Training Loss')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.grid(True, alpha=0.3)
plt.show()

print("Contrastive learning training completed!")
