# FIT Library Showcase

This notebook demonstrates the features of the FIT library - a machine learning framework built with the help of NumPy.

The FIT library that I've created provides tensor operations with automatic differentiation, neural network components with inclusion of linear layers, activations, and normalization, attention mechanisms and transformers, multiple optimizers such as SGD, Adam, SAM, and Lion, loss functions for regression and classification, data pipeline utilities with built-in datasets, training monitoring and visualization, model persistence for saving and loading, and APIs for quick experimentation.

I tried to make this library lightweight and it only requires NumPy. It offers a PyTorch-like familiar API, and has an extensible clean architecture for adding components. 

# Quick Start

## Installation

In [1]:
# !pip install git+https://github.com/Klus3kk/fit.git

## Tensor Creation

In [2]:
import numpy as np
from fit.core.tensor import Tensor

# Create tensors from various data types
a = Tensor([1.0, 2.0, 3.0], requires_grad=True)
b = Tensor([[1, 2], [3, 4]], requires_grad=True)
c = Tensor(np.random.randn(3, 3), requires_grad=True)

print(f"Vector: {a}")
print(f"Matrix: {b}")
print(f"Random tensor: {c}")

Vector: Tensor([1. 2. 3.], requires_grad=True)
Matrix: Tensor([[1. 2.]
 [3. 4.]], requires_grad=True)
Random tensor: Tensor([[ 0.1874374   2.30901503 -2.09992575]
 [ 0.18774414  0.10507803  1.04678915]
 [-0.17617486  1.20022882  0.42736107]], requires_grad=True)


## Operations

In [3]:
# Basic arithmetic
x = Tensor([1.0, 2.0, 3.0], requires_grad=True)
y = Tensor([4.0, 5.0, 6.0], requires_grad=True)

# Addition and multiplication
z = x + y
w = x * y

print(f"x + y = {z.data}")
print(f"x * y = {w.data}")

# Matrix operations
A = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
B = Tensor([[2.0, 1.0], [4.0, 3.0]], requires_grad=True)
C = A @ B  # Matrix multiplication

print(f"Matrix multiplication result:\n{C.data}")

x + y = [5. 7. 9.]
x * y = [ 4. 10. 18.]
Matrix multiplication result:
[[10.  7.]
 [22. 15.]]


# Tensor Operations & Autograd

## Automatic DIfferentiation

In [4]:
from fit.core.tensor import Tensor

# Function: f(x) = x² + 2x + 1
x = Tensor([2.0], requires_grad=True)

# Forward pass
y = x * x + 2 * x + 1
print(f"f({x.data[0]}) = {y.data[0]}")

# Backward pass - compute gradient
y.backward()
print(f"f'({x.data[0]}) = {x.grad[0]}")
print(f"Expected derivative: {2 * x.data[0] + 2}")

f(2.0) = 9.0
f'(2.0) = 6.0
Expected derivative: 6.0


## Multi-variable functions

In [5]:
# Function: f(a, b) = sum(a * b)
a = Tensor([1.0, 2.0], requires_grad=True)
b = Tensor([3.0, 4.0], requires_grad=True)

# Forward pass
c = a * b
loss = c.sum()

# Backward pass
loss.backward()

print(f"Gradient of a = {a.grad}")
print(f"Gradient of b = {b.grad}")

Gradient of a = [3. 4.]
Gradient of b = [1. 2.]


## Chain Rule

In [6]:
# Complex function composition
x = Tensor([1.0], requires_grad=True)
y = x * 2
z = y + 3
w = z * z

w.backward()
print(f"Chain rule gradient: {x.grad[0]}")

Chain rule gradient: 20.0


# Neural Network Components

## Linear Layers

In [7]:
from fit.nn.modules.linear import Linear
from fit.core.tensor import Tensor

# Create a linear layer
layer = Linear(3, 2)  # 3 inputs -> 2 outputs

# Forward pass
x = Tensor([[1.0, 2.0, 3.0]], requires_grad=True)
output = layer(x)

print(f"Input shape: {x.data.shape}")
print(f"Output shape: {output.data.shape}")
print(f"Layer weights shape: {layer.weight.data.shape}")
print(f"Layer bias shape: {layer.bias.data.shape}")

Input shape: (1, 3)
Output shape: (1, 2)
Layer weights shape: (2, 3)
Layer bias shape: (2,)


## Activation Functions

In [8]:
from fit.nn.modules.activation import ReLU, Softmax, GELU

# Test different activations
x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]], requires_grad=True)

relu = ReLU()
softmax = Softmax()
gelu = GELU()

print(f"Input: {x.data}")
print(f"ReLU: {relu(x).data}")
print(f"Softmax: {softmax(x).data}")
print(f"GELU: {gelu(x).data}")

Input: [[-2. -1.  0.  1.  2.]]
ReLU: [[-0. -0.  0.  1.  2.]]
Softmax: [[0.01165623 0.03168492 0.08612854 0.23412166 0.63640865]]
GELU: [[-0.04540231 -0.15880801  0.          0.84119199  1.95459769]]


## Building Models

In [9]:
from fit.nn.modules.container import Sequential
from fit.nn.modules.linear import Linear
from fit.nn.modules.activation import ReLU, Softmax

# Create a neural network
model = Sequential(
    Linear(4, 8),    # Input layer
    ReLU(),          # Activation
    Linear(8, 6),    # Hidden layer
    ReLU(),          # Activation
    Linear(6, 3),    # Output layer
    Softmax()        # Final activation
)

# Test the model
x = Tensor([[1.0, 2.0, 3.0, 4.0]], requires_grad=True)
output = model(x)

print(f"Model output: {output.data}")
print(f"Output sum (should be ~1.0): {output.data.sum()}")
print(f"Number of parameters: {len(model.parameters())}")

Model output: [[0.01665713 0.93909529 0.04424758]]
Output sum (should be ~1.0): 1.0
Number of parameters: 6


## Normalization Layers

In [10]:
from fit.nn.modules.normalization import BatchNorm, LayerNorm

# Batch normalization
batch_norm = BatchNorm(4)
x = Tensor([[1.0, 2.0, 3.0, 4.0], [2.0, 3.0, 4.0, 5.0]], requires_grad=True)
normalized = batch_norm(x)

print(f"Original: {x.data}")
print(f"Batch normalized: {normalized.data}")

# Layer normalization  
layer_norm = LayerNorm(4)
layer_normalized = layer_norm(x)
print(f"Layer normalized: {layer_normalized.data}")

Original: [[1. 2. 3. 4.]
 [2. 3. 4. 5.]]
Batch normalized: [[-0.99998 -0.99998 -0.99998 -0.99998]
 [ 0.99998  0.99998  0.99998  0.99998]]
Layer normalized: [[-1.34163542 -0.44721181  0.44721181  1.34163542]
 [-1.34163542 -0.44721181  0.44721181  1.34163542]]


# Optimizers & Training

## SGD Optimizer

In [11]:
from fit.optim.sgd import SGD
from fit.nn.modules.linear import Linear

# Create model and optimizer
model = Linear(2, 1)
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)

# Simulate training step
x = Tensor([[1.0, 2.0]], requires_grad=True)
target = Tensor([[3.0]])

# Forward pass
output = model(x)
loss = ((output - target) ** 2).mean()

# Backward pass
loss.backward()

# Optimizer step
optimizer.step()
optimizer.zero_grad()

print(f"Loss: {loss.data}")
print("Training step completed!")

TypeError: SGD.__init__() got an unexpected keyword argument 'momentum'

## Adam Optimizer

In [None]:
from fit.optim.adam import Adam

# Create model and Adam optimizer
model = Sequential(
    Linear(2, 4),
    ReLU(),
    Linear(4, 1)
)

optimizer = Adam(model.parameters(), lr=0.001, beta1=0.9, beta2=0.999)

# Training loop simulation
for step in range(5):
    # Sample data
    x = Tensor([[np.random.randn(), np.random.randn()]], requires_grad=True)
    target = Tensor([[np.random.randn()]])
    
    # Forward pass
    output = model(x)
    loss = ((output - target) ** 2).mean()
    
    # Backward pass
    loss.backward()
    
    # Optimizer step
    optimizer.step()
    optimizer.zero_grad()
    
    print(f"Step {step+1}, Loss: {loss.data[0]:.4f}")

# Data Pipeline

## Custom dataset

In [None]:
from fit.data.dataset import Dataset
from fit.data.dataloader import DataLoader

# Create custom dataset
X = np.random.randn(100, 4)
y = np.random.randint(0, 3, 100)

dataset = Dataset(X, y)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

print(f"Dataset size: {len(dataset)}")
print(f"Number of batches: {len(dataloader)}")

# Iterate through batches
for i, (batch_x, batch_y) in enumerate(dataloader):
    print(f"Batch {i+1}: X shape {batch_x.data.shape}, y shape {batch_y.data.shape}")
    if i >= 2:  # Show first 3 batches
        break

## Dataset Loaders

In [None]:
from fit.simple.data import load_dataset

# Load XOR dataset
xor_data = load_dataset('xor', batch_size=4)
print("XOR Dataset loaded:")
print(f"Train loader batches: {len(xor_data['train'])}")

# Load Iris dataset
iris_data = load_dataset('iris', batch_size=32, validation_split=0.2)
print("Iris Dataset loaded:")
print(f"Train batches: {len(iris_data['train'])}")
print(f"Validation batches: {len(iris_data['val'])}")

# Sample from XOR dataset
for x, y in xor_data['train']:
    print(f"XOR - X: {x.data}, y: {y.data}")
    break

## Feature Selection

In [None]:
from fit.data.feature_selection import SelectKBest, f_classif

# Generate sample data
X = np.random.randn(100, 10)
y = np.random.randint(0, 2, 100)

# Feature selection
selector = SelectKBest(score_func=f_classif, k=5)
X_selected = selector.fit_transform(X, y)

print(f"Original features: {X.shape[1]}")
print(f"Selected features: {X_selected.shape[1]}")
print(f"Selected feature indices: {selector.get_support()}")

# Loss Functions

## Regression Loss

In [None]:
from fit.loss.regression import MSELoss

mse = MSELoss()

# Sample predictions and targets
predictions = Tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
targets = Tensor([[1.2, 1.8], [2.9, 4.1]])

loss = mse(predictions, targets)
print(f"MSE Loss: {loss.data}")

# Compute gradients
loss.backward()
print(f"Gradients: {predictions.grad}")

## Classification Loss

In [None]:
from fit.loss.classification import CrossEntropyLoss

ce_loss = CrossEntropyLoss()

# Logits and class labels
logits = Tensor([[2.0, 1.0, 0.1], [0.5, 2.0, 0.8]], requires_grad=True)
targets = Tensor([0, 1])  # Class indices

loss = ce_loss(logits, targets)
print(f"CrossEntropy Loss: {loss.data}")

# Compute gradients
loss.backward()
print(f"Logits gradients: {logits.grad}")

# Attention & Transformers

## Attention Mechanism

In [None]:
from fit.nn.modules.attention import MultiHeadAttention

# Create multi-head attention
attention = MultiHeadAttention(d_model=64, num_heads=8)

# Sample input (batch_size=2, seq_len=10, d_model=64)
x = Tensor(np.random.randn(2, 10, 64), requires_grad=True)

# Self-attention
output = attention(x, x, x)
print(f"Attention input shape: {x.data.shape}")
print(f"Attention output shape: {output.data.shape}")

## Transformer Block

In [None]:
from fit.nn.modules.transformer import TransformerEncoderBlock

# Create transformer encoder block
transformer_block = TransformerEncoderBlock(
    d_model=64, 
    num_heads=8, 
    d_ff=256, 
    dropout=0.1
)

# Forward pass
x = Tensor(np.random.randn(2, 10, 64), requires_grad=True)
output = transformer_block(x)

print(f"Transformer input shape: {x.data.shape}")
print(f"Transformer output shape: {output.data.shape}")

## Spectral Normalization

In [None]:
from fit.nn.utils.spectral_norm import SpectralNormLinear

# Linear layer with spectral normalization
spec_linear = SpectralNormLinear(10, 5, n_power_iterations=1)

x = Tensor(np.random.randn(3, 10), requires_grad=True)
output = spec_linear(x)

print(f"Output shape: {output.data.shape}")

# Monitoring

## Training Tracker

In [None]:
from fit.monitor.tracker import TrainingTracker

# Create tracker
tracker = TrainingTracker(experiment_name="demo_experiment")

# Simulate training with metrics
for epoch in range(10):
    # Simulate epoch metrics
    train_loss = 1.0 - epoch * 0.1 + np.random.normal(0, 0.05)
    val_loss = 1.2 - epoch * 0.08 + np.random.normal(0, 0.08)
    accuracy = epoch * 0.08 + np.random.normal(0, 0.02)
    
    # Log metrics
    tracker.log_epoch(epoch, {
        'train_loss': train_loss,
        'val_loss': val_loss,
        'accuracy': accuracy
    })
    
    tracker.log_learning_rate(0.001 * (0.9 ** epoch))

print("Metrics logged!")
print(f"Best validation loss: {tracker.best_values.get('val_loss', 'N/A')}")

# Export metrics
tracker.export("training_log.json")

## Model performance monitoring

In [None]:
from fit.utils.engine import evaluate

# Create a simple model for demonstration
model = Sequential(
    Linear(4, 8),
    ReLU(),
    Linear(8, 3),
    Softmax()
)

# Create sample data
X = Tensor(np.random.randn(20, 4))
y = Tensor(np.random.randint(0, 3, 20))
dataset = Dataset(X.data, y.data)
dataloader = DataLoader(dataset, batch_size=5)

# Evaluate model
from fit.loss.classification import CrossEntropyLoss
loss_fn = CrossEntropyLoss()

metrics = evaluate(model, dataloader, loss_fn)
print(f"Evaluation metrics: {metrics}")

# Model save/load

## Save and load models

In [None]:
from fit.nn.utils.model_io import save_model, load_model

# Create and train a simple model
model = Sequential(
    Linear(2, 4),
    ReLU(),
    Linear(4, 1)
)

# Save model
save_model(model, "demo_model.pkl")
print("Model saved!")

# Load model
loaded_model = load_model("demo_model.pkl")
print("Model loaded!")

# Test that loaded model works
test_input = Tensor([[1.0, 2.0]])
original_output = model(test_input)
loaded_output = loaded_model(test_input)

print(f"Original output: {original_output.data}")
print(f"Loaded output: {loaded_output.data}")
print(f"Outputs match: {np.allclose(original_output.data, loaded_output.data)}")

# Examples of usage

## XOR problem

In [None]:
from fit.simple.trainer import Trainer

# Create XOR model
xor_model = Sequential(
    Linear(2, 8),
    ReLU(),
    Linear(8, 4),
    ReLU(),
    Linear(4, 1)
)

# XOR data
X = Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
y = Tensor([[0], [1], [1], [0]])

# Create trainer
trainer = Trainer(
    model=xor_model,
    loss='mse',
    optimizer='adam',
    lr=0.01
)

# Train the model
print("Training XOR model...")
history = trainer.fit(
    data=(X, y),
    epochs=100,
    batch_size=4,
    validation_split=0.0,  # Use all data for training
    verbose=True
)

# Test the trained model
print("\nXOR Results:")
for i, (input_val, expected) in enumerate(zip(X.data, y.data)):
    prediction = xor_model(Tensor([input_val]))
    print(f"Input: {input_val}, Expected: {expected[0]:.0f}, Predicted: {prediction.data[0]:.3f}")

## Multi-class classification

In [None]:
# Load Iris dataset
iris_data = load_dataset('iris', batch_size=16, validation_split=0.3)

# Create classification model
classifier = Sequential(
    Linear(4, 16),
    ReLU(),
    Linear(16, 8),
    ReLU(),
    Linear(8, 3),
    Softmax()
)

# Create trainer
trainer = Trainer(
    model=classifier,
    loss='crossentropy',
    optimizer='adam',
    lr=0.01
)

# Train
print("Training Iris classifier...")
history = trainer.fit(
    data=iris_data['train'],
    validation_data=iris_data['val'],
    epochs=50,
    verbose=True
)

# Evaluate
final_metrics = evaluate(classifier, iris_data['val'], CrossEntropyLoss())
print(f"Final validation metrics: {final_metrics}")

## Custom training loop

In [None]:
from fit.optim.adam import Adam
from fit.loss.regression import MSELoss

# Generate synthetic regression data
np.random.seed(42)
X_train = np.random.randn(100, 3)
true_weights = np.array([1.5, -2.0, 0.5])
y_train = X_train @ true_weights + 0.1 * np.random.randn(100)

# Create model
regression_model = Sequential(
    Linear(3, 8),
    ReLU(),
    Linear(8, 1)
)

# Setup training
optimizer = Adam(regression_model.parameters(), lr=0.01)
loss_fn = MSELoss()

# Custom training loop
print("Training loop for regression...")
for epoch in range(100):
    # Convert to tensors
    X_tensor = Tensor(X_train, requires_grad=True)
    y_tensor = Tensor(y_train.reshape(-1, 1))
    
    # Forward pass
    predictions = regression_model(X_tensor)
    loss = loss_fn(predictions, y_tensor)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}: Loss = {loss.data[0]:.4f}")

print("Training completed!")

# Test prediction
test_X = Tensor([[1.0, -1.0, 0.5]])
prediction = regression_model(test_X)
expected = 1.0 * 1.5 + (-1.0) * (-2.0) + 0.5 * 0.5  # Using true weights
print(f"Test prediction: {prediction.data[0]:.3f}, Expected: {expected:.3f}")