# PyTorch Level 101: A Beginner's Complete Guide

Welcome to this comprehensive introduction to PyTorch!

## What You'll Learn
1. What is PyTorch?
2. Tensors and operations
3. Automatic differentiation
4. Building neural networks
5. Working with real data (pytorch_sample_data.csv)
6. Training a regression model

## 1. What is PyTorch?

**PyTorch** is Meta's open-source ML framework.

| Feature | Benefit |
|---------|--------|
| Dynamic graphs | Flexible, easy debugging |
| Pythonic | Natural Python integration |
| Research-friendly | Preferred in academia |

In [None]:
# Install packages
!pip install torch torchvision numpy pandas matplotlib scikit-learn

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"PyTorch: {torch.__version__}")
print(f"Device: {device}")

## 2. Tensors - The Building Blocks

In [None]:
# Creating tensors
t1 = torch.tensor([1, 2, 3])
t2 = torch.zeros(3, 4)
t3 = torch.randn(2, 3)

print(f"From list: {t1}")
print(f"Zeros:\n{t2}")
print(f"Random:\n{t3}")

In [None]:
# Tensor operations
a = torch.tensor([[1., 2.], [3., 4.]])
b = torch.tensor([[5., 6.], [7., 8.]])

print(f"Add:\n{a + b}")
print(f"Matmul:\n{a @ b}")
print(f"Mean: {a.mean()}")

## 3. Automatic Differentiation

In [None]:
x = torch.tensor([3.0], requires_grad=True)
y = x ** 2
y.backward()
print(f"y = x^2 at x=3: y={y.item()}, dy/dx={x.grad.item()}")

## 4. Building Neural Networks

In [None]:
# Custom network class
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

model = SimpleNet(10, 64, 1)
print(model)

## 5. Working with Real Data: pytorch_sample_data.csv

Let's build a house price prediction model with 100,000 records!

In [None]:
# Load dataset
df = pd.read_csv('pytorch_sample_data.csv')
print(f"Shape: {df.shape}")
print(f"\nPrice stats:\n{df['price'].describe()}")
df.head()

In [None]:
# Prepare features
feature_cols = ['size_sqft', 'bedrooms', 'bathrooms', 'age_years', 'distance_downtown',
                'lot_size', 'garage_spaces', 'quality_score', 'neighborhood_rating',
                'has_pool', 'has_basement']

X = df[feature_cols].values.astype(np.float32)
y = df['price'].values.astype(np.float32)

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

In [None]:
# Create DataLoaders
train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train_scaled, dtype=torch.float32)
)
test_dataset = TensorDataset(
    torch.tensor(X_test, dtype=torch.float32),
    torch.tensor(y_test_scaled, dtype=torch.float32)
)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

print(f"Train batches: {len(train_loader)}, Test batches: {len(test_loader)}")

In [None]:
# Define regression model
class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        return self.fc4(x).squeeze()

model = HousePriceModel(X_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(model)

In [None]:
# Training loop
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        pred = model(X_batch)
        loss = criterion(pred, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            pred = model(X_batch)
            total_loss += criterion(pred, y_batch).item()
    return total_loss / len(loader)

# Train
train_losses, test_losses = [], []
for epoch in range(50):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    test_loss = evaluate(model, test_loader, criterion)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}: Train={train_loss:.4f}, Test={test_loss:.4f}")

In [None]:
# Plot training
plt.figure(figsize=(10, 4))
plt.plot(train_losses, label='Train')
plt.plot(test_losses, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('Training Progress')
plt.legend()
plt.show()

In [None]:
# Evaluate
model.eval()
with torch.no_grad():
    X_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    preds_scaled = model(X_tensor).cpu().numpy()

preds = scaler_y.inverse_transform(preds_scaled.reshape(-1, 1)).flatten()

mae = mean_absolute_error(y_test, preds)
rmse = np.sqrt(np.mean((y_test - preds) ** 2))
r2 = r2_score(y_test, preds)

print(f"MAE: ${mae:,.2f}")
print(f"RMSE: ${rmse:,.2f}")
print(f"RÂ²: {r2:.4f}")

In [None]:
# Plot predictions
plt.figure(figsize=(10, 6))
plt.scatter(y_test[:1000], preds[:1000], alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Price ($)')
plt.ylabel('Predicted Price ($)')
plt.title('Predicted vs Actual House Prices')
plt.show()

In [None]:
# Save model
torch.save(model.state_dict(), 'house_price_model.pth')
print("Model saved!")

## Summary

### What We Covered
- PyTorch basics and tensors
- Automatic differentiation
- Building custom neural networks
- Training on real data (100,000 records)
- Model evaluation

### Resources
- [PyTorch Documentation](https://pytorch.org/docs/)
- [PyTorch Tutorials](https://pytorch.org/tutorials/)