***Importamos las librerías necesarias :***

In [1]:
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder

***Guardamos las trayectorias con sus respectivas clases en "complete_df" :***

In [2]:
csv_files = [
    "./Trayectorias/Tipos_de_barcos/longitud_8/Cargo_modificado_compressed_8.csv",
    "./Trayectorias/Tipos_de_barcos/longitud_8/Container_modificado_compressed_8.csv",
    "./Trayectorias/Tipos_de_barcos/longitud_8/Cruise_modificado_compressed_8.csv",
    "./Trayectorias/Tipos_de_barcos/longitud_8/Fishing_modificado_compressed_8.csv",
    "./Trayectorias/Tipos_de_barcos/longitud_8/Tanker_modificado_compressed_8.csv"
]

complete_df = pd.DataFrame()

for file in csv_files:
    df = pd.read_csv(file)
    df['Bearing'] = df['Bearing'].round(4)
    grouped = df.groupby('Trajectory_ID')['Bearing'].apply(lambda x: [i for i in x if pd.notna(i)]).reset_index()
    grouped['Type'] = file.split('/')[-1].split('_')[0]
    complete_df = pd.concat([complete_df, grouped], ignore_index=True)

### COMPROBACIÓN BEARING ###
for tipo in complete_df['Type'].unique():
    first_element = complete_df[complete_df['Type'] == tipo].iloc[0]
    print(f"Type: {tipo}, Bearing: {first_element['Bearing']}")

print()

### COMPROBACIÓN NÚMERO DE TRAYECTORIAS ###
print("Número total de trayectorias:", len(complete_df))
class_counts = complete_df['Type'].value_counts()
print(class_counts)

Type: Cargo, Bearing: [96.5905, 78.9129, 107.9285, 79.5855, 101.9231, 69.7462, 95.8744]
Type: Container, Bearing: [89.3541, 89.7018, 89.6913, 89.5125, 89.7389, 89.62, 89.7405]
Type: Cruise, Bearing: [88.8022, 98.1133, 88.558, 84.8833, 93.6194, 78.3894, 93.7414]
Type: Fishing, Bearing: [233.7547, 89.9999, 261.005, 308.6651, 247.3507, 186.4404, 148.1713]
Type: Tanker, Bearing: [93.0646, 93.4023, 91.6164, 82.2953, 81.0646, 83.0176, 87.048]

Número total de trayectorias: 14486
Type
Fishing      2927
Cargo        2919
Tanker       2892
Container    2886
Cruise       2862
Name: count, dtype: int64


***Dividimos "complete_df" en entrenamiento y test :***

In [3]:
complete_df = complete_df.sample(frac=1, random_state=42).reset_index(drop=True)

test_size = 0.2
train_df_list = []
test_df_list = []

classes = complete_df['Type'].unique()

for class_name in classes:
    class_subset = complete_df[complete_df['Type'] == class_name]
    
    test_count = int(len(class_subset) * test_size)
    
    test_df_list.append(class_subset.iloc[:test_count])
    train_df_list.append(class_subset.iloc[test_count:])

train_df = pd.concat(train_df_list).reset_index(drop=True)
test_df = pd.concat(test_df_list).reset_index(drop=True)

train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)
test_df = test_df.sample(frac=1, random_state=42).reset_index(drop=True)

#### COMPROBACIÓN ####
print(f"Nº de trayectorias en train: {len(train_df)}")
print(train_df['Type'].value_counts())
print()
print(f"Nº de trayectorias en test: {len(test_df)}")
print(test_df['Type'].value_counts())
print()
print("Ejm. conjunto train:")
print(train_df.head(5))
print()
print("Ejm. conjunto test:")
print(test_df.head(5))

Nº de trayectorias en train: 11591
Type
Fishing      2342
Cargo        2336
Tanker       2314
Container    2309
Cruise       2290
Name: count, dtype: int64

Nº de trayectorias en test: 2895
Type
Fishing      585
Cargo        583
Tanker       578
Container    577
Cruise       572
Name: count, dtype: int64

Ejm. conjunto train:
   Trajectory_ID                                            Bearing       Type
0           4465  [110.934, 83.0412, 136.7822, 101.2178, 64.1802...      Cargo
1          14619  [271.1761, 13.8253, 277.0206, 67.833, 199.7051...  Container
2          13151  [89.6625, 89.7794, 89.6551, 89.8846, 89.6753, ...    Fishing
3           9758  [81.925, 100.7672, 82.8752, 104.7483, 73.4801,...     Tanker
4          15204  [79.8872, 83.1972, 83.8323, 33.9177, 59.6246, ...      Cargo

Ejm. conjunto test:
   Trajectory_ID                                            Bearing       Type
0           1981  [89.9999, 68.5169, 135.0583, 48.6043, 98.4034,...    Fishing
1           6712  [

***Pre-procesado de los datos :***

In [4]:
#Convertimos Bearing a un array de numpy
X_train = np.array(train_df['Bearing'].tolist())
X_test = np.array(test_df['Bearing'].tolist())

#Normalizamos los valores de Bearing
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Añadimos una dimensión extra para que tenga la forma (n, 1)
X_train = np.expand_dims(X_train, axis = -1)
X_test = np.expand_dims(X_test, axis = -1)

#Codificamos las etiquetas de Type
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_df['Type'])
y_test = label_encoder.transform(test_df['Type'])

#Conversión a tensores
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

X_train = torch.tensor(X_train, dtype=torch.float32).permute(0, 2, 1).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

print("X_train:", X_train.shape, X_train.device)
print("y_train:", y_train.shape, y_train.device)
print("X_test:", X_test.shape, X_test.device)
print("y_test:", y_test.shape, y_test.device)

Device: cuda
X_train: torch.Size([11591, 1, 7]) cuda:0
y_train: torch.Size([11591]) cuda:0
X_test: torch.Size([2895, 1, 7]) cuda:0
y_test: torch.Size([2895]) cuda:0


***Añadir conjunto de validación :***

In [5]:
batch_size = 32

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print("X_train:", X_train.shape, X_train.device)
print("y_train:", y_train.shape, y_train.device)
print("X_val:", X_val.shape, X_val.device)
print("y_val:", y_val.shape, y_val.device)

X_train: torch.Size([9272, 1, 7]) cuda:0
y_train: torch.Size([9272]) cuda:0
X_val: torch.Size([2319, 1, 7]) cuda:0
y_val: torch.Size([2319]) cuda:0


***Datasets y DataLoaders :***

In [6]:
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=12, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=12, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=12, pin_memory=True)