# Pattern Recognition 24H1
#### Runze Ji, Jiashuo Tian, Ziqian Liu

#### Import necessary Modules
* pandas
* scikit-learn
* itertools.islice
* tqdm

In [1]:
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from itertools import islice
import csv

#### Specify the parameters used in training the classifier:
* Number of training files (TRAIN_FILES_COUNT)
* Epochs (EPOCHS)
* Path to train files (TRAIN_FILES_PATH)
* Path to Model (MODEL_PATH)

In [2]:
TRAIN_FILES_COUNT = 1500
TRAIN_FILES_OFFSET = 2000
EPOCHS = 20

TRAIN_FILES_PATH = '../../PR/train'
MODEL_PATH = '../../PR/model2.ptm'
LOGS_PATH = '../../PR/eval2.csv'

TRAIN_FILES = os.listdir(TRAIN_FILES_PATH)
TRAIN_FILES_END = TRAIN_FILES_COUNT + TRAIN_FILES_OFFSET
NUMBER_OF_EPOCHS = 0
print(f'[init] Found {len(TRAIN_FILES)} Training Files\n')

[init] Found 18329 Training Files



#### Slice training files and encode labels
* All files containing datasets will be sliced in specified count, allowing separate training
* Encode 'type'

In [3]:
print(f'[init] Reading from Index-{TRAIN_FILES_OFFSET} to Index-{TRAIN_FILES_END-1}')
train_files_pb = tqdm(islice(TRAIN_FILES, TRAIN_FILES_OFFSET, TRAIN_FILES_END), '[preproc.loadCSV] Loading CSV Files...')

all_labels = []

for file in train_files_pb:
    file_path = os.path.join(TRAIN_FILES_PATH, file)
    data = pd.read_csv(file_path)
    all_labels.extend(data['type'].unique())

label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

[init] Reading from Index-2000 to Index-3499


[preproc.loadCSV] Loading CSV Files...: 1500it [00:12, 119.30it/s]


#### Transforms dataframe and extend datatypes
* Analyzes dataframe and extract features
* Extend features

In [4]:
X_all = []
y_all = []

train_files_pb = tqdm(islice(TRAIN_FILES, TRAIN_FILES_COUNT),'[preproc.transform] Transforming Data...', TRAIN_FILES_COUNT)

for file in train_files_pb:
    file_path = os.path.join(TRAIN_FILES_PATH, file)
    data = pd.read_csv(file_path)

    # 转换时间列，提取特征等
    data['time'] = pd.to_datetime(data['time'])
    data['hour'] = data['time'].dt.hour
    data['day_of_week'] = data['time'].dt.dayofweek
    data['month'] = data['time'].dt.month

    # 使用转换后的标签
    data['type_encoded'] = label_encoder.transform(data['type'])

    X = data[['lat', 'lon', '速度', '方向', 'hour', 'day_of_week', 'month']]
    y = data['type_encoded']

    X_all.append(X)
    y_all.append(y)

# 将所有数据合并为一个大的 DataFrame
X = pd.concat(X_all, ignore_index=True)
y = pd.concat(y_all, ignore_index=True)

  data['time'] = pd.to_datetime(data['time'])
[preproc.transform] Transforming Data...: 100%|██████████| 1500/1500 [00:25<00:00, 57.70it/s]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

#### Define dataset structure
* Create Datasets for training and verifying

In [6]:
from torch.utils.data import Dataset, DataLoader
import torch

class FishingVesselDataset(Dataset):
    def __init__(self, features, labels):
        """
        features: 特征数据，尺寸为 (n_samples, n_features)
        labels: 标签数据，尺寸为 (n_samples,)
        """
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 假设 X_train, y_train, X_test, y_test 已经准备好了
# 将数据转换为 PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# 创建 Dataset
train_dataset = FishingVesselDataset(X_train_tensor, y_train_tensor)
test_dataset = FishingVesselDataset(X_test_tensor, y_test_tensor)

# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

#### Define Neural Network Structure, Loss Function, and  Optimizer

In [7]:
import torch.nn as nn
import torch.nn.functional as F

class FishingVesselNet(nn.Module):
    def __init__(self, num_features, num_classes):
        super(FishingVesselNet, self).__init__()
        self.fc1 = nn.Linear(num_features, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, num_classes)

    def forward(self, x):

        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = F.sigmoid(self.fc3(x))
        x = self.fc4(x)

        return x

# 实例化模型
num_features = X_train.shape[1]
num_classes = len(torch.unique(y_train_tensor)) # 假设所有类别都在训练集中出现过

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'[torch.cuda] Availability: {torch.cuda.is_available()}')

model = FishingVesselNet(num_features, num_classes).to(device)

import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

[torch.cuda] Availability: False


  from .autonotebook import tqdm as notebook_tqdm


#### Load Pre-trained Model

In [None]:
model = FishingVesselNet(num_features, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)

checkpoint = torch.load(MODEL_PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
NUMBER_OF_EPOCHS = checkpoint['epoch']
loss = checkpoint['loss']

##model.eval()
# - or -
model.train()
NUMBER_OF_EPOCHS

#### Train a single epoch and evaluate accuracy

In [None]:
# Train single epoch
train_single_pb = tqdm(train_loader)
train_single_pb.set_description(f'[torch.train.single] Training Single Epoch {NUMBER_OF_EPOCHS + 1}')

for inputs, labels in train_single_pb:
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
NUMBER_OF_EPOCHS += 1
print(f'[torch.train.step] Epoch {NUMBER_OF_EPOCHS}, Loss: {loss.item()}, Total Number of Epochs: {NUMBER_OF_EPOCHS}')

# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

torch.save({
            'epoch': NUMBER_OF_EPOCHS,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, MODEL_PATH)

with open('../../PR/eval.csv', 'a') as eval_file:
    eval_file.writelines(f'{NUMBER_OF_EPOCHS},{format(100 * correct / total, ".2f")},{format(loss.item(), ".2f")}\n')
    eval_file.close()

#### Evaluate Current Accuracy

In [None]:
# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

#### Train Specified Number of Epochs and Evaluate Accuracy on each Epoch

In [9]:
# Train Specified Number of Epochs and Evaluate Accuracy on each Epoch
for ep in range(EPOCHS):
    train_single_pb = tqdm(train_loader)
    train_single_pb.set_description(f'[torch.train.single] Training Single Epoch {NUMBER_OF_EPOCHS + 1}')

    for inputs, labels in train_single_pb:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    NUMBER_OF_EPOCHS += 1
    print(f'[torch.train.step] Epoch {NUMBER_OF_EPOCHS}, Loss: {loss.item()}, Total Number of Epochs: {NUMBER_OF_EPOCHS}')

    # Test current accuracy
    correct = 0
    total = 0

    test_loader_pb = tqdm(test_loader)
    test_loader_pb.set_description('[torch.test] Testing Accuracy')

    with torch.no_grad():
        for inputs, labels in test_loader_pb:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'\n[torch.test.result] Epoch:{NUMBER_OF_EPOCHS}, Loss:{loss.item()}, Accuracy:{100 * correct / total}%\n')

    torch.save({
                'epoch': NUMBER_OF_EPOCHS,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, MODEL_PATH)

    with open(LOGS_PATH, 'a') as eval_file:
        eval_file.writelines(f'{NUMBER_OF_EPOCHS},{format(100 * correct / total, ".2f")},{format(loss.item(), ".2f")}\n')
        eval_file.close()

[torch.train.single] Training Single Epoch 21:   0%|          | 0/23095 [00:00<?, ?it/s]

[torch.train.single] Training Single Epoch 21: 100%|██████████| 23095/23095 [01:48<00:00, 212.73it/s]


[torch.train.step] Epoch 21, Loss: 0.886793851852417, Total Number of Epochs: 21


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:27<00:00, 553.80it/s]



[torch.test.result] Epoch:21, Loss:0.886793851852417, Accuracy:74.70279958025579%



[torch.train.single] Training Single Epoch 22: 100%|██████████| 23095/23095 [01:53<00:00, 203.44it/s]


[torch.train.step] Epoch 22, Loss: 0.7707154154777527, Total Number of Epochs: 22


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:30<00:00, 511.07it/s]



[torch.test.result] Epoch:22, Loss:0.7707154154777527, Accuracy:75.03333800166843%



[torch.train.single] Training Single Epoch 23: 100%|██████████| 23095/23095 [01:53<00:00, 203.68it/s]


[torch.train.step] Epoch 23, Loss: 0.7228807806968689, Total Number of Epochs: 23


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:30<00:00, 497.23it/s]



[torch.test.result] Epoch:23, Loss:0.7228807806968689, Accuracy:75.33261887509362%



[torch.train.single] Training Single Epoch 24: 100%|██████████| 23095/23095 [01:53<00:00, 203.20it/s]


[torch.train.step] Epoch 24, Loss: 0.4244494140148163, Total Number of Epochs: 24


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 523.91it/s]



[torch.test.result] Epoch:24, Loss:0.4244494140148163, Accuracy:75.46840653485724%



[torch.train.single] Training Single Epoch 25: 100%|██████████| 23095/23095 [01:53<00:00, 203.44it/s]


[torch.train.step] Epoch 25, Loss: 0.410101056098938, Total Number of Epochs: 25


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 545.18it/s]



[torch.test.result] Epoch:25, Loss:0.410101056098938, Accuracy:75.75205863428872%



[torch.train.single] Training Single Epoch 26: 100%|██████████| 23095/23095 [01:50<00:00, 209.63it/s]


[torch.train.step] Epoch 26, Loss: 0.4149444103240967, Total Number of Epochs: 26


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 521.32it/s]



[torch.test.result] Epoch:26, Loss:0.4149444103240967, Accuracy:75.57385001654214%



[torch.train.single] Training Single Epoch 27: 100%|██████████| 23095/23095 [01:49<00:00, 211.36it/s]


[torch.train.step] Epoch 27, Loss: 0.6505568623542786, Total Number of Epochs: 27


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 535.14it/s]



[torch.test.result] Epoch:27, Loss:0.6505568623542786, Accuracy:75.95107178884511%



[torch.train.single] Training Single Epoch 28: 100%|██████████| 23095/23095 [01:50<00:00, 209.07it/s]


[torch.train.step] Epoch 28, Loss: 0.45645949244499207, Total Number of Epochs: 28


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 545.43it/s]



[torch.test.result] Epoch:28, Loss:0.45645949244499207, Accuracy:75.74201156529276%



[torch.train.single] Training Single Epoch 29: 100%|██████████| 23095/23095 [01:50<00:00, 208.84it/s]


[torch.train.step] Epoch 29, Loss: 0.6424070000648499, Total Number of Epochs: 29


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 545.03it/s]



[torch.test.result] Epoch:29, Loss:0.6424070000648499, Accuracy:75.39198791916067%



[torch.train.single] Training Single Epoch 30: 100%|██████████| 23095/23095 [01:57<00:00, 197.30it/s]


[torch.train.step] Epoch 30, Loss: 0.5375939607620239, Total Number of Epochs: 30


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 537.66it/s]



[torch.test.result] Epoch:30, Loss:0.5375939607620239, Accuracy:75.4791640026711%



[torch.train.single] Training Single Epoch 31: 100%|██████████| 23095/23095 [02:02<00:00, 189.25it/s]


[torch.train.step] Epoch 31, Loss: 0.5349096655845642, Total Number of Epochs: 31


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 537.37it/s]



[torch.test.result] Epoch:31, Loss:0.5349096655845642, Accuracy:74.97102587678437%



[torch.train.single] Training Single Epoch 32: 100%|██████████| 23095/23095 [02:00<00:00, 191.94it/s]


[torch.train.step] Epoch 32, Loss: 0.46722084283828735, Total Number of Epochs: 32


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:31<00:00, 493.22it/s]



[torch.test.result] Epoch:32, Loss:0.46722084283828735, Accuracy:75.77732853509674%



[torch.train.single] Training Single Epoch 33: 100%|██████████| 23095/23095 [01:52<00:00, 204.96it/s]


[torch.train.step] Epoch 33, Loss: 0.4147057831287384, Total Number of Epochs: 33


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 530.28it/s]



[torch.test.result] Epoch:33, Loss:0.4147057831287384, Accuracy:75.04866231902591%



[torch.train.single] Training Single Epoch 34: 100%|██████████| 23095/23095 [02:02<00:00, 188.39it/s]


[torch.train.step] Epoch 34, Loss: 0.684242844581604, Total Number of Epochs: 34


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:31<00:00, 482.93it/s]



[torch.test.result] Epoch:34, Loss:0.684242844581604, Accuracy:76.20336485474374%



[torch.train.single] Training Single Epoch 35: 100%|██████████| 23095/23095 [02:00<00:00, 191.22it/s]


[torch.train.step] Epoch 35, Loss: 0.5474082231521606, Total Number of Epochs: 35


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 521.07it/s]



[torch.test.result] Epoch:35, Loss:0.5474082231521606, Accuracy:76.02109681518061%



[torch.train.single] Training Single Epoch 36: 100%|██████████| 23095/23095 [01:54<00:00, 200.94it/s]


[torch.train.step] Epoch 36, Loss: 0.5344355702400208, Total Number of Epochs: 36


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:31<00:00, 482.90it/s]



[torch.test.result] Epoch:36, Loss:0.5344355702400208, Accuracy:74.63470277928315%



[torch.train.single] Training Single Epoch 37: 100%|██████████| 23095/23095 [01:57<00:00, 196.82it/s]


[torch.train.step] Epoch 37, Loss: 0.5186828374862671, Total Number of Epochs: 37


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:28<00:00, 544.38it/s]



[torch.test.result] Epoch:37, Loss:0.5186828374862671, Accuracy:75.98263379346879%



[torch.train.single] Training Single Epoch 38: 100%|██████████| 23095/23095 [02:01<00:00, 190.19it/s]


[torch.train.step] Epoch 38, Loss: 0.7173260450363159, Total Number of Epochs: 38


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 518.51it/s]



[torch.test.result] Epoch:38, Loss:0.7173260450363159, Accuracy:75.33525749927438%



[torch.train.single] Training Single Epoch 39: 100%|██████████| 23095/23095 [02:02<00:00, 188.54it/s]


[torch.train.step] Epoch 39, Loss: 0.6455652117729187, Total Number of Epochs: 39


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:29<00:00, 523.20it/s]



[torch.test.result] Epoch:39, Loss:0.6455652117729187, Accuracy:75.94965099120932%



[torch.train.single] Training Single Epoch 40: 100%|██████████| 23095/23095 [01:58<00:00, 194.56it/s]


[torch.train.step] Epoch 40, Loss: 0.4806520342826843, Total Number of Epochs: 40


[torch.test] Testing Accuracy: 100%|██████████| 15397/15397 [00:33<00:00, 466.09it/s]


[torch.test.result] Epoch:40, Loss:0.4806520342826843, Accuracy:76.22264710837236%






#### Model Saving

In [None]:
torch.save({
            'epoch': NUMBER_OF_EPOCHS,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, MODEL_PATH)