Task 1: Image Classification


In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

Load the Fashion-MNIST train and test CSV files, normalize pixel values, reshape them into image tensors, and build DataLoaders

In [None]:
# Load dataset & basic preprocessing
TRAIN_CSV_PATH = "/content/fashion-mnist_train.csv"
TEST_CSV_PATH  = "/content/fashion-mnist_test.csv"

# Load the training and test splits
train_df = pd.read_csv(TRAIN_CSV_PATH)
test_df  = pd.read_csv(TEST_CSV_PATH)

def preprocess_fashion_df(df):
    # Labels: class ids from 0 to 9
    y = df.iloc[:, 0].values

    # Pixel data, normalized to [0, 1]
    X = df.iloc[:, 1:].values / 255.0

    # Reshape to image tensors of shape N x 1 x 28 x 28
    X = X.reshape(-1, 1, 28, 28)

    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)
    return X, y

# Preprocess both train and test splits
X_train, y_train = preprocess_fashion_df(train_df)
X_test,  y_test  = preprocess_fashion_df(test_df)

# Build DataLoaders: shuffle for training, no shuffle for test
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader   = DataLoader(TensorDataset(X_test,  y_test),  batch_size=64, shuffle=False)

Task 2: Loan Default Prediction

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load data

df = pd.read_csv("/content/Loan payments data.csv")

In [3]:
# Construct target label
df["loan_status_binary"] = (df["loan_status"] != "PAIDOFF").astype(int)

In [4]:
# Handle datetime fields
df["effective_date"] = pd.to_datetime(df["effective_date"])
df["due_date"] = pd.to_datetime(df["due_date"])

In [5]:
# safe derived feature:
df["loan_duration_days"] = (df["due_date"] - df["effective_date"]).dt.days

In [6]:
# Select SAFE features only (remove leakage)
numeric_cols = ["Principal", "terms", "age", "loan_duration_days"]
categorical_cols = ["education", "Gender"]

X_raw = df[numeric_cols + categorical_cols]
y = df["loan_status_binary"]

In [7]:
# One-Hot encoding for categorical vars
X_cat = pd.get_dummies(X_raw[categorical_cols], drop_first=True)
X_num = X_raw[numeric_cols]

X_processed = pd.concat([X_num, X_cat], axis=1)

In [8]:
# Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_processed)

In [9]:
# Train-test split
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [10]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32)

In [11]:
# Build DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [12]:
# pos_weight for imbalanced classification
pos_count = y_train.sum().item()
neg_count = len(y_train) - pos_count
pos_weight = torch.tensor([neg_count / pos_count])

print("Preprocessing done. ")

Preprocessing done. 
