In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader



ModuleNotFoundError: No module named 'torch'

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)



In [None]:
# 1. Load and Preprocess the Covertype Dataset
covtype = fetch_covtype()
X, y = covtype.data, covtype.target



In [None]:
# Filter for binary classification (classes 1 and 2)
mask = (y <= 2)
X_binary = X[mask]
y_binary = y[mask] - 1  # Relabel: 1->0, 2->1



In [None]:
# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_binary, y_binary, test_size=0.2, random_state=42
)



In [None]:
# Standardize the continuous features (first 10 columns)
scaler = StandardScaler()
X_train[:, :10] = scaler.fit_transform(X_train[:, :10])
X_test[:, :10] = scaler.transform(X_test[:, :10])



In [None]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Add dimension for BCELoss
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)



In [None]:
# Create DataLoader for batching
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
# 2. Define the Logistic Regression Model
class LogisticRegression(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # Single output for binary classification
    
    def forward(self, x):
        return torch.sigmoid(self.linear(x))  # Sigmoid for probability output



In [None]:
# Initialize the model
input_dim = X_train.shape[1]  # 54 features
model = LogisticRegression(input_dim)



In [None]:
# 3. Set up Loss Function and Optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent



In [None]:
# 4. Training Loop
num_epochs = 20
print("Training started...")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * X_batch.size(0)
    
    # Print epoch loss
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")



In [None]:
# 5. Testing the Model
model.eval()
with torch.no_grad():
    # Predictions on test set
    y_pred_prob = model(X_test)
    y_pred = (y_pred_prob >= 0.5).float()  # Threshold at 0.5
    
    # Calculate accuracy
    accuracy = (y_pred.eq(y_test).sum() / y_test.size(0)).item()
    test_loss = criterion(y_pred_prob, y_test).item()
    
    print(f"\nTest Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")



In [None]:
# Optional: Print a few predictions vs. actual labels
print("\nSample Predictions vs. Actual:")
for i in range(5):
    print(f"Pred: {y_pred[i].item():.0f}, Actual: {y_test[i].item():.0f}, Prob: {y_pred_prob[i].item():.4f}")