In [16]:
import numpy as np
from scipy.optimize import minimize
import pandas as pd
import matplotlib.pyplot as plt

# Q1

In [2]:
# Create random seeds
np.random.seed(0)
n = 1_000_000
x = np.random.normal(0, 1, n) # standard normal distribution

In [5]:
# Sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

y = sigmoid(x)   # 1-layer

In [6]:
# Loss function

def loss_1_layer(weights):
    y_pred = sigmoid(weights[0] * x)
    return np.mean((y - y_pred) ** 2)

In [7]:
# Initial guess
initial_weights_1_layer = np.array([0.1])

In [8]:
result_1_layer = minimize(loss_1_layer, initial_weights_1_layer)

weights_1_layer_optimized = result_1_layer.x
training_error_1_layer = result_1_layer.fun

In [9]:
print(f"Optimized weight: {weights_1_layer_optimized[0]}")
print(f"Training error: {training_error_1_layer}")

Optimized weight: 0.9999746652072058
Training error: 1.521799218766232e-11


In [11]:
## When layers euqal to 2
def loss_2_layer(weights):
    h1 = sigmoid(weights[0] * x)
    y_pred = weights[2] * sigmoid(weights[1] * h1)
    return np.mean((y - y_pred) ** 2)

initial_weights_2_layer = np.array([0.1, 0.1, 0.1])

# Minimize the loss function for the 2-layer NN
result_2_layer = minimize(loss_2_layer, initial_weights_2_layer)

# Extract optimized weights and training error for the 2-layer NN
weights_2_layer_optimized = result_2_layer.x
training_error_2_layer = result_2_layer.fun

In [12]:
print(f"Optimized weights for 2-layer NN: {weights_2_layer_optimized}")
print(f"Training error for 2-layer NN: {training_error_2_layer}")

Optimized weights for 2-layer NN: [5.59337038 3.4605317  0.67314478]
Training error for 2-layer NN: 0.010075646134034917


## Explaination

In this case our input and ooutput are simple, the added layer introduced unnecessary complexity, making it harder for the model to learn the simple underlying relationship of the data.

# Q2

In [17]:
# Load dataset
df = pd.read_csv('card_transdata-1.csv')
df

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
0,57.877857,0.311140,1.945940,1,1,0,0,0
1,10.829943,0.175592,1.294219,1,0,0,0,0
2,5.091079,0.805153,0.427715,1,0,0,1,0
3,2.247564,5.600044,0.362663,1,1,0,1,0
4,44.190936,0.566486,2.222767,1,1,0,1,0
...,...,...,...,...,...,...,...,...
999995,2.207101,0.112651,1.626798,1,1,0,0,0
999996,19.872726,2.683904,2.778303,1,1,0,0,0
999997,2.914857,1.472687,0.218075,1,1,0,1,0
999998,4.258729,0.242023,0.475822,1,0,0,1,0


In [25]:
## Use the packages in tutorial
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler

Try when max_epochs = 10, lr = 0.01

In [24]:
# Set X and Y
X = df.drop('fraud', axis=1).values
y = df['fraud'].values

# Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
## Basically same as the last homework

# Convert to Torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train[:, None], dtype=torch.float32)  # Reshaping for binary classification
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test[:, None], dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define NN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)  # Adjustable
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

model = Net()

# Loss and Optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.SGD(model.parameters(), lr=0.01) # lr rate, set as 0.01

# Train the Model
num_epochs = 10  # Adjustable
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item() * inputs.size(0)  # Multiply by batch size

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total_preds += labels.size(0)
        correct_preds += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct_preds / total_preds

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

# Evaluate the Model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    y_pred = model(X_test_tensor)             ##预测结果
    y_pred_class = (y_pred >= 0.5).float()    ##预测类别
    accuracy = accuracy_score(y_test_tensor, y_pred_class)
    f1 = f1_score(y_test_tensor, y_pred_class)

print(f'Final Accuracy: {accuracy}')
print(f'F1 Score: {f1}')

Epoch 1/10, Loss: 0.0841
Epoch 2/10, Loss: 0.0333
Epoch 3/10, Loss: 0.0227
Epoch 4/10, Loss: 0.0177
Epoch 5/10, Loss: 0.0138
Epoch 6/10, Loss: 0.0118
Epoch 7/10, Loss: 0.0106
Epoch 8/10, Loss: 0.0100
Epoch 9/10, Loss: 0.0088
Epoch 10/10, Loss: 0.0082
Final Accuracy: 0.99796
F1 Score: 0.9882745143119899


##数据准备和预处理
数据集分割：使用train_test_split函数将数据集df分为训练集和测试集，其中test_size=0.2表示测试集占总数据的20%，random_state=42保证每次分割的结果都是一样的。

特征标准化：使用StandardScaler对特征进行标准化处理，即使数据的均值为0，标准差为1。这是因为神经网络训练时数值较小且相近的数据可以帮助模型更快地收敛。

转换为Torch张量：将NumPy数组转换为PyTorch张量，以便在PyTorch中使用。对于标签y，通过添加一个维度([:, None])来进行重塑，使其适合二分类任务。

创建数据加载器：使用DataLoader创建可迭代的数据加载器，用于批量加载数据并在训练期间可选择地对数据进行洗牌。

神经网络定义
定义网络结构：Net类继承自nn.Module，定义了一个具有三个全连接层（fc1, fc2, fc3）的简单神经网络。网络的第一层接受输入特征的维度，最后一层输出一个单一的预测值。relu作为激活函数增加非线性，最后一层之后应用sigmoid激活函数，输出一个介于0和1之间的预测概率。
损失函数和优化器
损失函数和优化器：使用二元交叉熵损失（BCELoss）作为损失函数，这适用于二分类问题。SGD（随机梯度下降）作为优化器，负责更新网络的权重。
训练过程
训练循环：对于指定的训练轮次（num_epochs），执行以下步骤：
在每个批次的数据上执行前向传播，计算预测值。
计算损失值。
执行反向传播，计算梯度。
使用优化器更新模型参数。
计算并累积整个训练集上的损失和准确率。
评估过程
模型评估：在训练完成后，将模型设置为评估模式（.eval()），这会停用诸如Dropout等仅在训练时需要的层。使用torch.no_grad()停用梯度计算，减少计算需求和内存使用。
计算测试数据的预测值。
根据阈值（这里是0.5）将预测概率转换为类别标签。
计算并打印准确率和F1分数。

## In this NN,when we add the epoch, the loss becomes lower, and in our final model, the final avvruracy is decent and also the F1 socre, which reprecents this model is trustable, its preformance is also better than a simple random decision tree(s).