# Pattern Recognition 24H1
#### Runze Ji, Jiashuo Tian, Ziqian Liu

#### Import necessary Modules

In [None]:
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from itertools import islice

#### Specify the parameters used in training the classifier:
* Number of training files (TRAIN_FILES_COUNT)
* Epochs (EPOCHS)
* Path to train files (TRAIN_FILES_PATH)
* Path to Model (MODEL_PATH)

In [None]:
TRAIN_FILES_COUNT = 1500
TRAIN_FILES_OFFSET = 2000
EPOCHS = 20
TRAIN_FILES_PATH = '../../PR/train'
MODEL_PATH = '../../PR/model.ptm'
TRAIN_FILES = os.listdir(TRAIN_FILES_PATH)
TRAIN_FILES_END = TRAIN_FILES_COUNT + TRAIN_FILES_OFFSET
NUMBER_OF_EPOCHS = 0
print(f'[init] Found {len(TRAIN_FILES)} Training Files\n')

#### Slice training files and encode labels

In [None]:
print(f'[init] Reading from Index-{TRAIN_FILES_OFFSET} to Index-{TRAIN_FILES_END-1}')
train_files_pb = tqdm(islice(TRAIN_FILES, TRAIN_FILES_OFFSET, TRAIN_FILES_END), '[preproc.loadCSV] Loading CSV Files...')

all_labels = []

for file in train_files_pb:
    file_path = os.path.join(TRAIN_FILES_PATH, file)
    data = pd.read_csv(file_path)
    all_labels.extend(data['type'].unique())

label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

#### Transforms dataframe and extend datatypes

In [None]:
X_all = []
y_all = []

train_files_pb = tqdm(islice(TRAIN_FILES, TRAIN_FILES_COUNT),'[preproc.transform] Transforming Data...', TRAIN_FILES_COUNT)

for file in train_files_pb:
    file_path = os.path.join(TRAIN_FILES_PATH, file)
    data = pd.read_csv(file_path)

    # 转换时间列，提取特征等
    data['time'] = pd.to_datetime(data['time'])
    data['hour'] = data['time'].dt.hour
    data['day_of_week'] = data['time'].dt.dayofweek
    data['month'] = data['time'].dt.month

    # 使用转换后的标签
    data['type_encoded'] = label_encoder.transform(data['type'])

    X = data[['lat', 'lon', '速度', '方向', 'hour', 'day_of_week', 'month']]
    y = data['type_encoded']

    X_all.append(X)
    y_all.append(y)

# 将所有数据合并为一个大的 DataFrame
X = pd.concat(X_all, ignore_index=True)
y = pd.concat(y_all, ignore_index=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

#### Define dataset structure

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class FishingVesselDataset(Dataset):
    def __init__(self, features, labels):
        """
        features: 特征数据，尺寸为 (n_samples, n_features)
        labels: 标签数据，尺寸为 (n_samples,)
        """
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 假设 X_train, y_train, X_test, y_test 已经准备好了
# 将数据转换为 PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# 创建 Dataset
train_dataset = FishingVesselDataset(X_train_tensor, y_train_tensor)
test_dataset = FishingVesselDataset(X_test_tensor, y_test_tensor)

# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

#### Define Neural Network Structure, Loss Function, and  Optimizer

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class FishingVesselNet(nn.Module):
    def __init__(self, num_features, num_classes):
        super(FishingVesselNet, self).__init__()
        self.fc1 = nn.Linear(num_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        '''
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        '''

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

# 实例化模型
num_features = X_train.shape[1]
num_classes = len(torch.unique(y_train_tensor)) # 假设所有类别都在训练集中出现过

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'[torch.cuda] Availability: {torch.cuda.is_available()}')

model = FishingVesselNet(num_features, num_classes).to(device)

import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

#### Train a single epoch and evaluate accuracy

In [None]:
# Train single epoch
train_single_pb = tqdm(train_loader)
train_single_pb.set_description(f'[torch.train.single] Training Single Epoch {NUMBER_OF_EPOCHS + 1}')

for inputs, labels in train_single_pb:
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
NUMBER_OF_EPOCHS += 1
print(f'[torch.train.step] Epoch {NUMBER_OF_EPOCHS}, Loss: {loss.item()}, Total Number of Epochs: {NUMBER_OF_EPOCHS}')

# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

torch.save({
            'epoch': NUMBER_OF_EPOCHS,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, MODEL_PATH)

with open('../../PR/eval.csv', 'a') as eval_file:
    eval_file.writelines(f'{NUMBER_OF_EPOCHS},{format(100 * correct / total, ".2f")},{format(loss.item(), ".2f")}\n')
    eval_file.close()

#### Evaluate Current Accuracy

In [None]:
# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

#### Train Specified Number of Epochs and Evaluate Accuracy on each Epoch

In [None]:
# Train Specified Number of Epochs and Evaluate Accuracy on each Epoch
for ep in range(EPOCHS):
    train_single_pb = tqdm(train_loader)
    train_single_pb.set_description(f'[torch.train.single] Training Single Epoch {NUMBER_OF_EPOCHS + 1}')

    for inputs, labels in train_single_pb:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    NUMBER_OF_EPOCHS += 1
    print(f'[torch.train.step] Epoch {NUMBER_OF_EPOCHS}, Loss: {loss.item()}, Total Number of Epochs: {NUMBER_OF_EPOCHS}')

    # Test current accuracy
    correct = 0
    total = 0

    test_loader_pb = tqdm(test_loader)
    test_loader_pb.set_description('[torch.test] Testing Accuracy')

    with torch.no_grad():
        for inputs, labels in test_loader_pb:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'\n[torch.test.result] Epoch:{NUMBER_OF_EPOCHS}, Loss:{loss.item()}, Accuracy:{100 * correct / total}%\n')

    torch.save({
                'epoch': NUMBER_OF_EPOCHS,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, MODEL_PATH)

    with open('../../PR/eval.csv', 'a') as eval_file:
        eval_file.writelines(f'{NUMBER_OF_EPOCHS},{format(100 * correct / total, ".2f")},{format(loss.item(), ".2f")}\n')
        eval_file.close()

#### Model Saving

In [None]:
MODEL_PATH = '../../PR/model.ptm'
torch.save(model.state_dict(), MODEL_PATH)

In [None]:
torch.save({
            'epoch': NUMBER_OF_EPOCHS,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, MODEL_PATH)

In [None]:
NUMBER_OF_EPOCHS = 38

In [None]:
model = FishingVesselNet(num_features, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)

checkpoint = torch.load(MODEL_PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
NUMBER_OF_EPOCHS = checkpoint['epoch']
loss = checkpoint['loss']

##model.eval()
# - or -
model.train()
NUMBER_OF_EPOCHS