In [None]:
!pip install torch torchvision torchaudio





### 1. IMPORTS

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

### 2. CONFIG

In [None]:
RANDOM_SEED   = 42
BATCH_SIZE    = 2048        # smaller batch than 4096 → usually better generalization
LR            = 0.001
EPOCHS        = 120         # more than old 80, but with early stopping
PATIENCE      = 12
WARMUP_EPOCHS = 5

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

### 3. LOAD CSV FROM /content/

In [None]:
csv_path = "/content/covtype.csv"
assert os.path.exists(csv_path), "covtype.csv NOT FOUND in /content. Upload it in Colab files."

df = pd.read_csv(csv_path)
print("Raw shape:", df.shape)

### 4. BASIC CLEANING

In [None]:
# Drop rows with missing target
df = df.dropna(subset=["Cover_Type"])

# Ensure integer target
df["Cover_Type"] = df["Cover_Type"].astype(int)

# Replace infinities and drop remaining NaNs (if any)
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna()

print("Cleaned shape:", df.shape)
print(df.head())

### 5. FEATURE ENGINEERING (richer but not crazy)

In [None]:
cont_cols = [
    "Elevation", "Aspect", "Slope",
    "Horizontal_Distance_To_Hydrology", "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am", "Hillshade_Noon", "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points",
]

# Core extra features
df["mean_hillshade"] = (
    df["Hillshade_9am"] + df["Hillshade_Noon"] + df["Hillshade_3pm"]
) / 3.0

df["hydro_mag"] = np.sqrt(
    df["Horizontal_Distance_To_Hydrology"]**2 +
    df["Vertical_Distance_To_Hydrology"]**2
)

df["hydro_road"] = df["Horizontal_Distance_To_Hydrology"] / (
    df["Horizontal_Distance_To_Roadways"].abs() + 1.0
)

df["fire_road"] = (
    df["Horizontal_Distance_To_Fire_Points"] -
    df["Horizontal_Distance_To_Roadways"]
)

df["elev_hydro_diff"] = df["Elevation"] - df["Vertical_Distance_To_Hydrology"]
df["elev_road_diff"]  = df["Elevation"] - df["Horizontal_Distance_To_Roadways"]
df["elev_fire_diff"]  = df["Elevation"] - df["Horizontal_Distance_To_Fire_Points"]

df["shade_diff"] = df["Hillshade_Noon"] - df["Hillshade_9am"]
df["sun_intensity"] = (
    df["Hillshade_9am"] + df["Hillshade_Noon"] + df["Hillshade_3pm"]
)

df["slope_elev_ratio"] = df["Slope"] / (df["Elevation"] + 1.0)
df["aspect_sin"] = np.sin(np.deg2rad(df["Aspect"]))
df["aspect_cos"] = np.cos(np.deg2rad(df["Aspect"]))

df["terrain_index"] = df["Elevation"] + df["Vertical_Distance_To_Hydrology"]
df["moisture"] = df["hydro_mag"] / (df["Slope"] + 1.0)
df["sun_elev_ratio"] = df["sun_intensity"] / (df["Elevation"] + 1.0)

soil_cols = [c for c in df.columns if c.startswith("Soil_Type")]
df["soil_type_count"] = df[soil_cols].sum(axis=1)

extra_cols = [
    "mean_hillshade", "hydro_mag", "hydro_road", "fire_road",
    "elev_hydro_diff", "elev_road_diff", "elev_fire_diff",
    "shade_diff", "sun_intensity",
    "slope_elev_ratio", "aspect_sin", "aspect_cos",
    "terrain_index", "moisture", "sun_elev_ratio", "soil_type_count",
]

all_cont_cols = cont_cols + extra_cols
print("Total continuous columns:", len(all_cont_cols))

### 6. SPLIT DATA

In [None]:
X = df.drop("Cover_Type", axis=1)
y = df["Cover_Type"].values - 1   # labels: 0..6

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, stratify=y, random_state=RANDOM_SEED
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, stratify=y_temp, random_state=RANDOM_SEED
)

print("TRAIN:", X_train.shape, "VAL:", X_val.shape, "TEST:", X_test.shape)

### 7. SCALE CONTINUOUS COLUMNS

In [None]:
scaler = StandardScaler()

X_train_s = X_train.copy()
X_val_s   = X_val.copy()
X_test_s  = X_test.copy()

X_train_s[all_cont_cols] = scaler.fit_transform(X_train[all_cont_cols])
X_val_s[all_cont_cols]   = scaler.transform(X_val[all_cont_cols])
X_test_s[all_cont_cols]  = scaler.transform(X_test[all_cont_cols])

X_train_np = X_train_s.values.astype(np.float32)
X_val_np   = X_val_s.values.astype(np.float32)
X_test_np  = X_test_s.values.astype(np.float32)

y_train_np = y_train.astype(np.int64)
y_val_np   = y_val.astype(np.int64)
y_test_np  = y_test.astype(np.int64)

input_dim   = X_train_np.shape[1]
num_classes = len(np.unique(y_train_np))

print("Input dim =", input_dim, "| Classes =", num_classes)

### 8. DATASET & DATALOADER

In [None]:
class CoverDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_loader = DataLoader(CoverDataset(X_train_np, y_train_np), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(CoverDataset(X_val_np, y_val_np), batch_size=BATCH_SIZE)
test_loader  = DataLoader(CoverDataset(X_test_np, y_test_np), batch_size=BATCH_SIZE)

### 9. MODEL — STABLE DEEP MLP (from 95% version, slight tweak)

In [None]:
class DeepMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        act = nn.LeakyReLU(0.01)

        self.net = nn.Sequential(
            nn.Linear(input_dim, 1024), act,
            nn.Linear(1024, 512), act,
            nn.Linear(512, 512), act,
            nn.Linear(512, 256), act,
            nn.Linear(256, 256), act,
            nn.Linear(256, 128), act,
            nn.Linear(128, 64), act,
            nn.Linear(64, num_classes),
        )

        # Xavier init
        for m in self.net:
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.net(x)

model = DeepMLP(input_dim, num_classes).to(device)
print(model)

### 10. LOSS + OPTIMIZER + SCHEDULER

In [None]:
class_weights = compute_class_weight(
    "balanced", classes=np.unique(y_train_np), y=y_train_np
)
# Soften extremely strong weights a bit (sqrt) to avoid over-penalizing
class_weights = np.sqrt(np.clip(class_weights, 0.2, 8.0))
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)

# Smooth cosine schedule
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

### 11. TRAIN LOOP (with warmup + early stopping)

In [None]:
best_loss = np.inf
best_state = None
pat = 0

train_losses, val_losses, val_accs = [], [], []

for epoch in range(1, EPOCHS + 1):

    # Manual warmup for first WARMUP_EPOCHS
    if epoch <= WARMUP_EPOCHS:
        now_lr = LR * epoch / WARMUP_EPOCHS
        for g in optimizer.param_groups:
            g["lr"] = now_lr

    model.train()
    total_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()

        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        optimizer.step()

        total_loss += loss.item() * xb.size(0)

    train_loss = total_loss / len(X_train_np)

    # Validation
    model.eval()
    val_loss_sum = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)

            logits = model(xb)
            loss = criterion(logits, yb)
            val_loss_sum += loss.item() * xb.size(0)

            preds = logits.argmax(1)
            correct += (preds == yb).sum().item()
            total += yb.size(0)

    val_loss = val_loss_sum / len(X_val_np)
    val_acc = correct / total

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    scheduler.step()

    print(f"Epoch {epoch:03d} | Train {train_loss:.4f} | Val {val_loss:.4f} | Acc {val_acc:.4f}")

    if val_loss < best_loss - 1e-4:
        best_loss = val_loss
        best_state = model.state_dict()
        pat = 0
    else:
        pat += 1
        if pat >= PATIENCE:
            print("Early stopping!")
            break

model.load_state_dict(best_state)

### 12. TEST ACCURACY

In [None]:
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        logits = model(xb)
        preds = logits.argmax(1).cpu().numpy()

        y_true.append(yb.numpy())
        y_pred.append(preds)

y_true = np.concatenate(y_true)
y_pred = np.concatenate(y_pred)

print("\nFINAL TEST ACCURACY:", accuracy_score(y_true, y_pred))
print("\nClassification Report:\n", classification_report(y_true, y_pred))

### 13. PLOTS

In [None]:
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Val Loss")
plt.title("Loss Curve"); plt.legend()

plt.subplot(1,2,2)
plt.plot(val_accs, label="Val Accuracy")
plt.title("Validation Accuracy"); plt.legend()

plt.show()

### 14. Train and Evaluate Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize and train the Logistic Regression model
# Using 'saga' solver for multiclass with L1 penalty, which can be faster for large datasets
# max_iter is increased to ensure convergence, class_weight='balanced' addresses class imbalance
log_reg = LogisticRegression(
    solver='saga',
    multi_class='multinomial',
    max_iter=1000,
    random_state=RANDOM_SEED,
    n_jobs=-1, # Use all available CPU cores
    class_weight='balanced'
)

print("Training Logistic Regression model...")
log_reg.fit(X_train_np, y_train_np)
print("Logistic Regression training complete.")

# Make predictions on the test set
y_pred_log_reg = log_reg.predict(X_test_np)

# Evaluate the model
print("\nLOGISTIC REGRESSION PERFORMANCE:")
print("Test Accuracy:", accuracy_score(y_test_np, y_pred_log_reg))
print("\nClassification Report:\n", classification_report(y_test_np, y_pred_log_reg))

### 15. Train and Evaluate Support Vector Machine (SVM) Model

In [None]:
from sklearn.svm import LinearSVC

# For large datasets, LinearSVC is generally preferred over SVC with a linear kernel
# as it is implemented in terms of liblinear and is more scalable.
# Setting dual=False when n_samples > n_features is generally recommended.
# max_iter is increased to ensure convergence, class_weight='balanced' addresses class imbalance
svm_model = LinearSVC(
    random_state=RANDOM_SEED,
    dual=False, # Recommended when n_samples > n_features (which is the case here)
    max_iter=1000,
    class_weight='balanced'
)

print("\nTraining LinearSVC model...")
svm_model.fit(X_train_np, y_train_np)
print("LinearSVC training complete.")

# Make predictions on the test set
y_pred_svm = svm_model.predict(X_test_np)

# Evaluate the model
print("\nSVM (LinearSVC) PERFORMANCE:")
print("Test Accuracy:", accuracy_score(y_test_np, y_pred_svm))
print("\nClassification Report:\n", classification_report(y_test_np, y_pred_svm))

### 16. Confusion Matrix for Deep MLP Model

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Confusion Matrix for Deep MLP
cm_nn = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_nn, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Deep MLP')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

### 17. Confusion Matrix for Logistic Regression Model

In [None]:
# Confusion Matrix for Logistic Regression
cm_lr = confusion_matrix(y_test_np, y_pred_log_reg)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_lr, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Logistic Regression')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

### 18. Confusion Matrix for SVM Model

In [None]:
# Confusion Matrix for SVM (LinearSVC)
cm_svm = confusion_matrix(y_test_np, y_pred_svm)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - SVM (LinearSVC)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()