# 1. loading datasets

In [53]:
import torch
import pandas as pd
import numpy as np
import time
import joblib
import seaborn as sns
import pickle
import random

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F




In [54]:

from torch import nn 

# Check PyTorch version
print("PyTorch version:", torch.__version__)

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Get current CUDA device index (if available)
if torch.cuda.is_available():
    print("Current CUDA device index:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("No CUDA devices found.")
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


PyTorch version: 2.4.1+cpu
CUDA available: False
No CUDA devices found.


In [55]:
# Update with the correct path

# Data from HWLab
# file_path = './Data/25-02-10/cleaned_df.csv'
# Data from SWlab
file_path = './Data/25-02-04/combined_data.csv'

model_path = './models/CNN/'
cleaned_df = pd.read_csv(file_path)

In [56]:
cleaned_df

Unnamed: 0,Tx_0 RSSI,Tx_1 RSSI,Tx_2 RSSI,Tx_3 RSSI,Tx_4 RSSI,Tx_5 RSSI,Tx_6 RSSI,Tx_7 RSSI,X_Coord,Y_Coord
0,-54,-77,-74,127,127,-78,127,-71,0,0
1,-54,-77,127,-81,127,127,-81,-71,0,0
2,-54,127,127,127,127,127,127,-71,0,0
3,-54,-77,-74,-82,127,127,-82,-70,0,0
4,-54,-77,127,127,127,-78,-80,-70,0,0
...,...,...,...,...,...,...,...,...,...,...
7389,-70,-69,-80,-65,-79,127,-73,-62,4,3
7390,-70,-70,-84,-65,-79,-84,-74,-61,4,3
7391,-70,-69,-80,-64,-80,127,-76,-61,4,3
7392,-70,-70,-82,-67,-83,-84,-79,-61,4,3


In [57]:
from common_utils import preprocess_dataset

# X = cleaned_df[['Tx_0 RSSI', 'Tx_1 RSSI', 'Tx_2 RSSI', 'Tx_3 RSSI', 'Tx_4 RSSI', 'Tx_5 RSSI', 'Tx_6 RSSI', 'Tx_7 RSSI']]
# X = cleaned_df[['Tx_0 RSSI_Avg', 'Tx_1 RSSI_Avg', 'Tx_2 RSSI_Avg', 'Tx_3 RSSI_Avg', 'Tx_4 RSSI_Avg', 'Tx_5 RSSI_Avg', 'Tx_6 RSSI_Avg', 'Tx_7 RSSI_Avg']]
X = cleaned_df[['Tx_0 RSSI', 'Tx_1 RSSI', 'Tx_2 RSSI', 'Tx_3 RSSI', 'Tx_4 RSSI', 'Tx_5 RSSI', 'Tx_6 RSSI', 'Tx_7 RSSI']]
Y = cleaned_df[['X_Coord', 'Y_Coord']]



In [58]:
grid_data = {}

# Collect RSSI values per (X_Coord, Y_Coord)
for index, row in cleaned_df.iterrows():
    x, y = int(row['X_Coord']), int(row['Y_Coord'])
    
    # Initialize empty list for this location if not already present
    if (x, y) not in grid_data:
        grid_data[(x, y)] = []
    
    # Append RSSI values (Tx_0 to Tx_7)
    grid_data[(x, y)].append(row[X.columns].values)

# Define parameters
num_tx = 8      # Number of transmitters
seq_length = 8  # Sequence length (8 samples per training instance)
stride = 1      # Sliding window step (adjustable)


training_samples = []

# For sampling in order
for (x, y), rssi_values in grid_data.items():
    rssi_array = np.array(rssi_values) 
    
    if rssi_array.shape[0] < seq_length:
        continue 

    num_samples = rssi_array.shape[0]

    for start in range(0, num_samples - seq_length + 1, stride):
        window = rssi_array[start:start + seq_length].T  
        training_samples.append((window, (x, y)))

with open('./Data/CNN_Data/CNN_test_data.pkl', 'wb') as f:
    pickle.dump(training_samples, f)





In [59]:
# # Creating augmented data

# grid_data = {}

# # Collect RSSI values per (X_Coord, Y_Coord)
# for index, row in cleaned_df.iterrows():
#     x, y = int(row['X_Coord']), int(row['Y_Coord'])
    
#     if (x, y) not in grid_data:
#         grid_data[(x, y)] = []
    
#     # Append RSSI values (Tx_0 to Tx_7)
#     grid_data[(x, y)].append(row[X.columns].values)

# # Define parameters
# num_tx = 8      # Number of transmitters
# seq_length = 8  # Sequence length (8 samples per training instance)
# stride = 1      # Sliding window step (adjustable)

# training_samples = []
# augmented_samples = []

# # Convert grid_data keys to a list for easy access
# locations = list(grid_data.keys())

# # For sampling in order
# for (x, y), rssi_values in grid_data.items():
#     rssi_array = np.array(rssi_values) 
    
#     if rssi_array.shape[0] < seq_length:
#         continue 

#     num_samples = rssi_array.shape[0]

#     for start in range(0, num_samples - seq_length + 1, stride):
#         window = rssi_array[start:start + seq_length].T  
#         training_samples.append((window, (x, y)))

#         # Augment data by replacing two rows with random rows from another location
#         aug_window = window.copy()
#         rand_locs = random.sample(locations, 2)  # Select 2 random locations
#         for rand_loc in rand_locs:
#             if rand_loc in grid_data and len(grid_data[rand_loc]) > 0:
#                 rand_idx = random.randint(0, len(grid_data[rand_loc]) - 1)
#                 aug_window[random.randint(0, num_tx - 1)] = np.array(grid_data[rand_loc][rand_idx])

#         augmented_samples.append((aug_window, (x, y)))

# # Combine original and augmented data
# final_samples = training_samples + augmented_samples

# with open('./Data/CNN_Data/CNN_train_data_2fold.pkl', 'wb') as f:
#     pickle.dump(final_samples, f)

# print(f"Original samples: {len(training_samples)}")
# print(f"Augmented samples: {len(augmented_samples)}")
# print(f"Total dataset size: {len(final_samples)}")


In [60]:
if training_samples:
    X_Sequence = np.array([sample[0] for sample in training_samples])  # Shape: (num_samples, Tx, 8)
    y_Sequence = np.array([sample[1] for sample in training_samples])  # Shape: (num_samples, 2) -> (X_Coord, Y_Coord)
else:
    X_Sequence = np.array([])
    y_Sequence = np.array([])

print("Generated training data shape:", X_Sequence.shape)
print("Generated labels shape:", y_Sequence.shape)

Generated training data shape: (6946, 8, 8)
Generated labels shape: (6946, 2)
