# Pattern Recognition 24H1
#### Runze Ji, Jiashuo Tian, Ziqian Liu

In [None]:
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
train_files_path = '../../PR/train'
model_path = '/content/drive/MyDrive/PR/model.ptm'
train_files = os.listdir(train_files_path)

print(f'[PREPROC] Found {len(train_files)} Training Files\n')

In [None]:
all_labels = []

train_files_pb = tqdm(train_files)
train_files_pb.set_description('[preproc.loadCSV] Loading CSV Files...')

for file in train_files_pb:
    file_path = os.path.join(train_files_path, file)
    data = pd.read_csv(file_path)
    all_labels.extend(data['type'].unique())

label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

In [None]:
X_all = []
y_all = []

train_files_pb = tqdm(train_files)
train_files_pb.set_description('[preproc.transform] Transforming Data...')

for file in train_files_pb:
    file_path = os.path.join(train_files_path, file)
    data = pd.read_csv(file_path)

    # 转换时间列，提取特征等
    data['time'] = pd.to_datetime(data['time'])
    data['hour'] = data['time'].dt.hour
    data['day_of_week'] = data['time'].dt.dayofweek
    data['month'] = data['time'].dt.month

    # 使用转换后的标签
    data['type_encoded'] = label_encoder.transform(data['type'])

    X = data[['lat', 'lon', '速度', '方向', 'hour', 'day_of_week', 'month']]
    y = data['type_encoded']

    X_all.append(X)
    y_all.append(y)

# 将所有数据合并为一个大的 DataFrame
X = pd.concat(X_all, ignore_index=True)
y = pd.concat(y_all, ignore_index=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class FishingVesselDataset(Dataset):
    def __init__(self, features, labels):
        """
        features: 特征数据，尺寸为 (n_samples, n_features)
        labels: 标签数据，尺寸为 (n_samples,)
        """
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 假设 X_train, y_train, X_test, y_test 已经准备好了
# 将数据转换为 PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# 创建 Dataset
train_dataset = FishingVesselDataset(X_train_tensor, y_train_tensor)
test_dataset = FishingVesselDataset(X_test_tensor, y_test_tensor)

# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class FishingVesselNet(nn.Module):
    def __init__(self, num_features, num_classes):
        super(FishingVesselNet, self).__init__()
        self.fc1 = nn.Linear(num_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        '''
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        '''

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

# 实例化模型
num_features = X_train.shape[1]
num_classes = len(torch.unique(y_train_tensor)) # 假设所有类别都在训练集中出现过

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'[torch.cuda] Availability: {torch.cuda.is_available()}')

model = FishingVesselNet(num_features, num_classes).to(device)

In [None]:
import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 10

train_loader_pb = tqdm(train_loader)
train_loader_pb.set_description('[torch.train.epoch] Train Loader Step')

for epoch in range(num_epochs):
    for inputs, labels in train_loader_pb:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f' Epoch {epoch+1}, Loss: {loss.item()}')

In [18]:
# Train single epoch
train_single_pb = tqdm(train_loader)
train_single_pb.set_description('[torch.train.single] Training Single Epoch ')

for inputs, labels in train_single_pb:
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
epoch += 1
print(f'[torch.train.step] Epoch {epoch+1}, Loss: {loss.item()}')

[torch.train.single] Training Single Epoch : 100%|██████████| 269743/269743 [33:18<00:00, 134.94it/s] 

[torch.train.step] Epoch 5, Loss: 0.7565252780914307





In [19]:
# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

[torch.test] Testing Accuracy:   0%|          | 0/179829 [00:00<?, ?it/s]

[torch.test] Testing Accuracy: 100%|██████████| 179829/179829 [04:32<00:00, 660.50it/s]


[torch.test] Accuracy on test set: 65.13091973993303%





In [None]:
# Evaluate Accuracy on each epoch
import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 15

epochs_pb = tqdm(range(num_epochs))
epochs_pb.set_description('[torch.train] Training')

for epoch in epochs_pb:
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'[torch.train.accuracy] Epoch {epoch+1}, Loss: {loss.item()}, Accuracy: {100 * correct / total}%')

In [None]:
torch.save(model.state_dict(), model_path)

In [None]:
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, model_path)

In [None]:
model = FishingVesselNet(num_features, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)

checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()