# Pattern Recognition 24H1
#### Runze Ji, Jiashuo Tian, Ziqian Liu

In [4]:
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from itertools import islice

In [8]:
TRAIN_FILES_COUNT = 1000

In [2]:
train_files_path = '../../PR/train'
model_path = '../../PR/model.ptm'
train_files = os.listdir(train_files_path)

print(f'[PREPROC] Found {len(train_files)} Training Files\n')

[PREPROC] Found 18329 Training Files



In [5]:
all_labels = []

train_files_pb = tqdm(islice(train_files, TRAIN_FILES_COUNT),'[preproc.loadCSV] Loading CSV Files...')

for file in train_files_pb:
    file_path = os.path.join(train_files_path, file)
    data = pd.read_csv(file_path)
    all_labels.extend(data['type'].unique())

label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

[preproc.loadCSV] Loading CSV Files...: : 1000it [00:07, 136.61it/s]


In [9]:
X_all = []
y_all = []

train_files_pb = tqdm(islice(train_files, TRAIN_FILES_COUNT),'[preproc.transform] Transforming Data...')

for file in train_files_pb:
    file_path = os.path.join(train_files_path, file)
    data = pd.read_csv(file_path)

    # 转换时间列，提取特征等
    data['time'] = pd.to_datetime(data['time'])
    data['hour'] = data['time'].dt.hour
    data['day_of_week'] = data['time'].dt.dayofweek
    data['month'] = data['time'].dt.month

    # 使用转换后的标签
    data['type_encoded'] = label_encoder.transform(data['type'])

    X = data[['lat', 'lon', '速度', '方向', 'hour', 'day_of_week', 'month']]
    y = data['type_encoded']

    X_all.append(X)
    y_all.append(y)

# 将所有数据合并为一个大的 DataFrame
X = pd.concat(X_all, ignore_index=True)
y = pd.concat(y_all, ignore_index=True)

  data['time'] = pd.to_datetime(data['time'])
[preproc.transform] Transforming Data...: : 1000it [00:14, 67.72it/s]


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [11]:
from torch.utils.data import Dataset, DataLoader
import torch

class FishingVesselDataset(Dataset):
    def __init__(self, features, labels):
        """
        features: 特征数据，尺寸为 (n_samples, n_features)
        labels: 标签数据，尺寸为 (n_samples,)
        """
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# 假设 X_train, y_train, X_test, y_test 已经准备好了
# 将数据转换为 PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# 创建 Dataset
train_dataset = FishingVesselDataset(X_train_tensor, y_train_tensor)
test_dataset = FishingVesselDataset(X_test_tensor, y_test_tensor)

# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [12]:
import torch.nn as nn
import torch.nn.functional as F

class FishingVesselNet(nn.Module):
    def __init__(self, num_features, num_classes):
        super(FishingVesselNet, self).__init__()
        self.fc1 = nn.Linear(num_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        '''
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        '''

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

# 实例化模型
num_features = X_train.shape[1]
num_classes = len(torch.unique(y_train_tensor)) # 假设所有类别都在训练集中出现过

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'[torch.cuda] Availability: {torch.cuda.is_available()}')

model = FishingVesselNet(num_features, num_classes).to(device)

[torch.cuda] Availability: False


In [17]:
import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型迭代次数
num_epochs = 20

for epoch in tqdm(range(num_epochs), '[torch.train.epoch] Train Epochs'):
    for inputs, labels in tqdm(train_loader, '[torch.train.epoch.single] Single Epoch'):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    #print(f' Epoch {epoch+1}, Loss: {loss.item()}')

[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 264.92it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 263.32it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 270.10it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 269.52it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 269.89it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 271.32it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:07<00:00, 237.79it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 266.67it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 266.87it/s]
[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 266.63it/s]
[torch.train.epoch] Train Epochs: 100%|█

In [25]:
num_epochs = 20

for epoch in tqdm(range(num_epochs), '[torch.train.epoch] Train Epochs'):
    for inputs, labels in tqdm(train_loader, '[torch.train.epoch.single] Single Epoch'):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f' Epoch {epoch+1}, Loss: {loss.item()}')

[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 273.25it/s]
[torch.train.epoch] Train Epochs:   5%|▌         | 1/20 [00:58<18:36, 58.74s/it]

 Epoch 1, Loss: 0.369807630777359


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 272.52it/s]
[torch.train.epoch] Train Epochs:  10%|█         | 2/20 [01:57<17:38, 58.83s/it]

 Epoch 2, Loss: 0.7722816467285156


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 272.81it/s]
[torch.train.epoch] Train Epochs:  15%|█▌        | 3/20 [02:56<16:40, 58.83s/it]

 Epoch 3, Loss: 0.3232731521129608


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 272.79it/s]
[torch.train.epoch] Train Epochs:  20%|██        | 4/20 [03:55<15:41, 58.83s/it]

 Epoch 4, Loss: 0.4616638720035553


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 271.47it/s]
[torch.train.epoch] Train Epochs:  25%|██▌       | 5/20 [04:54<14:44, 58.94s/it]

 Epoch 5, Loss: 0.40403589606285095


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 271.71it/s]
[torch.train.epoch] Train Epochs:  30%|███       | 6/20 [05:53<13:45, 58.98s/it]

 Epoch 6, Loss: 0.4292660057544708


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 272.31it/s]
[torch.train.epoch] Train Epochs:  35%|███▌      | 7/20 [06:52<12:46, 58.97s/it]

 Epoch 7, Loss: 0.48033133149147034


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:59<00:00, 269.53it/s]
[torch.train.epoch] Train Epochs:  40%|████      | 8/20 [07:51<11:49, 59.16s/it]

 Epoch 8, Loss: 0.23125986754894257


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:00<00:00, 265.54it/s]
[torch.train.epoch] Train Epochs:  45%|████▌     | 9/20 [08:52<10:55, 59.56s/it]

 Epoch 9, Loss: 0.4116075932979584


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:05<00:00, 244.11it/s]
[torch.train.epoch] Train Epochs:  50%|█████     | 10/20 [09:58<10:14, 61.47s/it]

 Epoch 10, Loss: 0.6728768348693848


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [01:02<00:00, 258.73it/s]
[torch.train.epoch] Train Epochs:  55%|█████▌    | 11/20 [11:00<09:14, 61.64s/it]

 Epoch 11, Loss: 0.8310718536376953


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 275.70it/s]
[torch.train.epoch] Train Epochs:  60%|██████    | 12/20 [11:58<08:04, 60.60s/it]

 Epoch 12, Loss: 0.5148789882659912


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 278.40it/s]
[torch.train.epoch] Train Epochs:  65%|██████▌   | 13/20 [12:56<06:57, 59.71s/it]

 Epoch 13, Loss: 0.7719016671180725


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 278.93it/s]
[torch.train.epoch] Train Epochs:  70%|███████   | 14/20 [13:53<05:54, 59.05s/it]

 Epoch 14, Loss: 0.2730102837085724


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 279.41it/s]
[torch.train.epoch] Train Epochs:  75%|███████▌  | 15/20 [14:51<04:52, 58.57s/it]

 Epoch 15, Loss: 0.7968931794166565


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:58<00:00, 276.67it/s]
[torch.train.epoch] Train Epochs:  80%|████████  | 16/20 [15:49<03:53, 58.40s/it]

 Epoch 16, Loss: 0.4079136550426483


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 277.78it/s]
[torch.train.epoch] Train Epochs:  85%|████████▌ | 17/20 [16:46<02:54, 58.22s/it]

 Epoch 17, Loss: 0.35070183873176575


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 277.95it/s]
[torch.train.epoch] Train Epochs:  90%|█████████ | 18/20 [17:44<01:56, 58.07s/it]

 Epoch 18, Loss: 0.5674864649772644


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 277.29it/s]
[torch.train.epoch] Train Epochs:  95%|█████████▌| 19/20 [18:42<00:58, 58.02s/it]

 Epoch 19, Loss: 0.66657954454422


[torch.train.epoch.single] Single Epoch: 100%|██████████| 16049/16049 [00:57<00:00, 278.17it/s]
[torch.train.epoch] Train Epochs: 100%|██████████| 20/20 [19:40<00:00, 59.01s/it]

 Epoch 20, Loss: 0.4079294204711914





In [23]:
# Train single epoch
train_single_pb = tqdm(train_loader)
train_single_pb.set_description(f'[torch.train.single] Training Single Epoch {epoch+2}')

for inputs, labels in train_single_pb:
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
epoch += 1
print(f'[torch.train.step] Epoch {epoch+1}, Loss: {loss.item()}')

[torch.train.single] Training Single Epoch 12: 100%|██████████| 16049/16049 [00:58<00:00, 276.11it/s]

[torch.train.step] Epoch 12, Loss: 0.624221920967102





In [26]:
# Test current accuracy
correct = 0
total = 0

test_loader_pb = tqdm(test_loader)
test_loader_pb.set_description('[torch.test] Testing Accuracy')

with torch.no_grad():
    for inputs, labels in test_loader_pb:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'\n[torch.test] Accuracy on test set: {100 * correct / total}%')

[torch.test] Testing Accuracy: 100%|██████████| 10699/10699 [00:13<00:00, 778.91it/s]


[torch.test] Accuracy on test set: 74.78692144572119%





In [None]:
# Evaluate Accuracy on each epoch
import torch.optim as optim

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 15

epochs_pb = tqdm(range(num_epochs))
epochs_pb.set_description('[torch.train] Training')

for epoch in epochs_pb:
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'[torch.train.accuracy] Epoch {epoch+1}, Loss: {loss.item()}, Accuracy: {100 * correct / total}%')

In [27]:
num_epochs = 20

epochs_pb = tqdm(range(num_epochs))
epochs_pb.set_description('[torch.train] Training')

for epoch in epochs_pb:
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'[torch.train.accuracy] Epoch {epoch+1}, Loss: {loss.item()}, Accuracy: {100 * correct / total}%')

[torch.train] Training:   5%|▌         | 1/20 [01:07<21:31, 67.99s/it]

[torch.train.accuracy] Epoch 1, Loss: 0.3575284481048584, Accuracy: 74.9261016117443%


[torch.train] Training:  10%|█         | 2/20 [02:16<20:28, 68.27s/it]

[torch.train.accuracy] Epoch 2, Loss: 0.7616659998893738, Accuracy: 75.7532962186224%


[torch.train] Training:  15%|█▌        | 3/20 [03:25<19:22, 68.40s/it]

[torch.train.accuracy] Epoch 3, Loss: 0.6567702293395996, Accuracy: 74.55821615716698%


[torch.train] Training:  20%|██        | 4/20 [04:33<18:15, 68.46s/it]

[torch.train.accuracy] Epoch 4, Loss: 1.1146769523620605, Accuracy: 75.22899737704535%


[torch.train] Training:  25%|██▌       | 5/20 [05:42<17:06, 68.46s/it]

[torch.train.accuracy] Epoch 5, Loss: 0.38899847865104675, Accuracy: 75.00029208849114%


[torch.train] Training:  30%|███       | 6/20 [06:49<15:56, 68.29s/it]

[torch.train.accuracy] Epoch 6, Loss: 0.5774917602539062, Accuracy: 75.16429977626022%


[torch.train] Training:  35%|███▌      | 7/20 [07:58<14:50, 68.48s/it]

[torch.train.accuracy] Epoch 7, Loss: 0.5234153866767883, Accuracy: 75.31954480929542%


[torch.train] Training:  40%|████      | 8/20 [09:06<13:40, 68.37s/it]

[torch.train.accuracy] Epoch 8, Loss: 0.5309907793998718, Accuracy: 75.64902062728925%


[torch.train] Training:  45%|████▌     | 9/20 [10:15<12:31, 68.28s/it]

[torch.train.accuracy] Epoch 9, Loss: 0.5204753279685974, Accuracy: 75.21994263382034%


[torch.train] Training:  50%|█████     | 10/20 [11:23<11:24, 68.40s/it]

[torch.train.accuracy] Epoch 10, Loss: 0.7448833584785461, Accuracy: 75.40425047172292%


[torch.train] Training:  55%|█████▌    | 11/20 [12:30<10:11, 67.95s/it]

[torch.train.accuracy] Epoch 11, Loss: 1.0035487413406372, Accuracy: 74.73741244647478%


[torch.train] Training:  60%|██████    | 12/20 [13:37<09:00, 67.52s/it]

[torch.train.accuracy] Epoch 12, Loss: 0.42967626452445984, Accuracy: 75.34422628679584%


[torch.train] Training:  65%|██████▌   | 13/20 [14:44<07:52, 67.47s/it]

[torch.train.accuracy] Epoch 13, Loss: 0.5628199577331543, Accuracy: 74.52681664437058%


[torch.train] Training:  70%|███████   | 14/20 [15:51<06:43, 67.31s/it]

[torch.train.accuracy] Epoch 14, Loss: 0.8336343765258789, Accuracy: 75.10588207803436%


[torch.train] Training:  75%|███████▌  | 15/20 [16:58<05:35, 67.07s/it]

[torch.train.accuracy] Epoch 15, Loss: 0.622100293636322, Accuracy: 74.90901443501323%


[torch.train] Training:  80%|████████  | 16/20 [18:04<04:27, 66.83s/it]

[torch.train.accuracy] Epoch 16, Loss: 0.8880559802055359, Accuracy: 74.70469853546831%


[torch.train] Training:  85%|████████▌ | 17/20 [19:10<03:20, 66.72s/it]

[torch.train.accuracy] Epoch 17, Loss: 0.8210957646369934, Accuracy: 74.35477652309544%


[torch.train] Training:  90%|█████████ | 18/20 [20:17<02:13, 66.65s/it]

[torch.train.accuracy] Epoch 18, Loss: 0.5295153260231018, Accuracy: 74.8488442058406%


[torch.train] Training:  95%|█████████▌| 19/20 [21:23<01:06, 66.51s/it]

[torch.train.accuracy] Epoch 19, Loss: 0.8045713305473328, Accuracy: 75.68684608689048%


[torch.train] Training: 100%|██████████| 20/20 [22:30<00:00, 67.51s/it]

[torch.train.accuracy] Epoch 20, Loss: 0.37708941102027893, Accuracy: 75.4873496474492%





In [19]:
model_path = '../../PR/model.ptm'
torch.save(model.state_dict(), model_path)

In [20]:
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, model_path)

In [None]:
model = FishingVesselNet(num_features, num_classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)

checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()