In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.models import Sequential
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt

In [10]:
file_path = r'Combined_data_2024_06_24.csv'
data = pd.read_csv(file_path)
data.columns = [col.strip() for col in data.columns]
data = data.drop('Date & Time', axis=1)
data.head()

Unnamed: 0,Latitude,Longitude,Floor,CMKL-Guest_2a:3f:0b:56:e6:18,CMKL-Guest_2a:3f:0b:56:e8:f1,CMKL-Guest_2a:3f:0b:56:e8:f7,CMKL-Guest_2a:3f:0b:56:e9:00,CMKL-Guest_2a:3f:0b:56:e9:03,CMKL-Guest_2a:3f:0b:56:e9:15,CMKL-Guest_2a:3f:0b:56:e9:2a,...,_12:3f:1b:56:e9:15,_12:3f:1b:56:e9:2a,_12:3f:1b:57:fa:37,_da:55:b8:26:73:ef,eduroam_9c:50:ee:83:b2:92,eduroam_9c:50:ee:83:b4:52,eduroam_9c:50:ee:83:b4:72,eduroam_9c:50:ee:83:b8:32,guest_b6:fb:e4:a4:60:11,wifi-student_b4:fb:e4:e4:60:11
0,13.72789,100.778358,6,-100,-80,-52,-86,-77,-88,-70,...,-100,-79,-82,-100,-66,-78,-59,-80,-100,-100
1,13.72789,100.778358,6,-100,-83,-67,-86,-68,-86,-79,...,-100,-72,-100,-100,-100,-100,-100,-73,-100,-100
2,13.727887,100.778347,6,-100,-82,-66,-84,-68,-100,-76,...,-100,-100,-100,-100,-63,-74,-100,-72,-100,-100
3,13.727889,100.778357,6,-100,-82,-66,-84,-68,-86,-76,...,-100,-72,-100,-100,-63,-74,-100,-72,-100,-100
4,13.727887,100.778341,6,-100,-80,-67,-85,-67,-100,-80,...,-100,-100,-100,-100,-64,-75,-64,-73,-100,-100


In [11]:
## Data Normalization
from sklearn.preprocessing import MinMaxScaler

# Choose points to interpolate between
coordinate_columns = data[['Latitude', 'Longitude', 'Floor']].values
rssi_columns = data.drop(['Latitude', 'Longitude', 'Floor'], axis=1).columns

# Normalization: Shift RSSI data from [Min, Max] to [0, 100] then normalize to [0, 1]
min_rssi = data[rssi_columns].min().min()  # find the minimum across all RSSI values
shifted_rssi = data[rssi_columns] - min_rssi  # shift values to be positive

# Initialize scalers
rssi_scaler = MinMaxScaler(feature_range=(0, 1))
coord_scaler = MinMaxScaler(feature_range=(0, 1))

# Normalize RSSI values
normalized_rssi = rssi_scaler.fit_transform(shifted_rssi)

# Normalize coordinates
normalized_coords = coord_scaler.fit_transform(coordinate_columns)

# Combine normalized data
normalized_data = pd.DataFrame(normalized_rssi, columns=rssi_columns)
normalized_data[['Latitude', 'Longitude', 'Floor']] = normalized_coords
# Display the first few rows of normalized data
normalized_data.head()

Unnamed: 0,CMKL-Guest_2a:3f:0b:56:e6:18,CMKL-Guest_2a:3f:0b:56:e8:f1,CMKL-Guest_2a:3f:0b:56:e8:f7,CMKL-Guest_2a:3f:0b:56:e9:00,CMKL-Guest_2a:3f:0b:56:e9:03,CMKL-Guest_2a:3f:0b:56:e9:15,CMKL-Guest_2a:3f:0b:56:e9:2a,CMKL-Guest_2a:3f:0b:57:fa:37,CMKL-Guest_2a:3f:1b:56:e6:18,CMKL-Guest_2a:3f:1b:56:e8:f1,...,_da:55:b8:26:73:ef,eduroam_9c:50:ee:83:b2:92,eduroam_9c:50:ee:83:b4:52,eduroam_9c:50:ee:83:b4:72,eduroam_9c:50:ee:83:b8:32,guest_b6:fb:e4:a4:60:11,wifi-student_b4:fb:e4:e4:60:11,Latitude,Longitude,Floor
0,0.0,0.465116,0.923077,0.28,0.353846,0.235294,0.612245,0.0,0.527273,0.511628,...,0.0,0.607143,0.333333,0.732143,0.37037,0.0,0.0,0.891098,0.066795,0.0
1,0.0,0.395349,0.634615,0.28,0.492308,0.27451,0.428571,0.0,0.509091,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.891098,0.066795,0.0
2,0.0,0.418605,0.653846,0.32,0.492308,0.0,0.489796,0.0,0.0,0.0,...,0.0,0.660714,0.393939,0.0,0.518519,0.0,0.0,0.890399,0.06438,0.0
3,0.0,0.418605,0.653846,0.32,0.492308,0.27451,0.489796,0.0,0.509091,0.0,...,0.0,0.660714,0.393939,0.0,0.518519,0.0,0.0,0.890829,0.066532,0.0
4,0.0,0.465116,0.634615,0.3,0.507692,0.0,0.408163,0.0,0.0,0.44186,...,0.0,0.642857,0.378788,0.642857,0.5,0.0,0.0,0.890291,0.063063,0.0


In [12]:
## GAN Model Training

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

tensor_data = torch.tensor(normalized_data.values, dtype=torch.float32)
train_size = int(0.8 * len(tensor_data))
val_size = len(tensor_data) - train_size
train_dataset, val_dataset = random_split(tensor_data, [train_size, val_size])

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 256),
            nn.ReLU(True),
            nn.Linear(256, output_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

input_dim = 100
output_dim = normalized_data.shape[1]
data_dim = normalized_data.shape[1]

generator = Generator(input_dim, output_dim)
discriminator = Discriminator(data_dim)

lr = 0.0002
betas = (0.5, 0.999)
g_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=betas)
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=betas)

criterion = nn.BCELoss()
num_epochs = 5000

for epoch in range(num_epochs):
    for real_data in train_loader:
        real_data = real_data.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        batch_size = real_data.size(0)

        real_labels = torch.ones(batch_size, 1).to(real_data.device)
        fake_labels = torch.zeros(batch_size, 1).to(real_data.device)

        outputs = discriminator(real_data)
        d_loss_real = criterion(outputs, real_labels)
        real_score = outputs

        z = torch.randn(batch_size, input_dim).to(real_data.device)
        fake_data = generator(z)
        outputs = discriminator(fake_data.detach())
        d_loss_fake = criterion(outputs, fake_labels)
        fake_score = outputs

        d_loss = d_loss_real + d_loss_fake
        d_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()

        outputs = discriminator(fake_data)
        g_loss = criterion(outputs, real_labels)

        g_optimizer.zero_grad()
        g_loss.backward()
        g_optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}, D(x): {real_score.mean().item():.4f}, D(G(z)): {fake_score.mean().item():.4f}')


Epoch [10/5000], d_loss: 1.1922, g_loss: 0.8305, D(x): 0.5913, D(G(z)): 0.4734
Epoch [20/5000], d_loss: 1.1700, g_loss: 0.9395, D(x): 0.5842, D(G(z)): 0.4412
Epoch [30/5000], d_loss: 1.1006, g_loss: 0.9510, D(x): 0.5770, D(G(z)): 0.3973
Epoch [40/5000], d_loss: 1.0703, g_loss: 1.3101, D(x): 0.6347, D(G(z)): 0.4096
Epoch [50/5000], d_loss: 1.1404, g_loss: 1.2070, D(x): 0.5511, D(G(z)): 0.3568
Epoch [60/5000], d_loss: 1.3597, g_loss: 0.9075, D(x): 0.4786, D(G(z)): 0.3609
Epoch [70/5000], d_loss: 1.0771, g_loss: 1.2499, D(x): 0.6240, D(G(z)): 0.3949
Epoch [80/5000], d_loss: 1.3400, g_loss: 1.1402, D(x): 0.5243, D(G(z)): 0.4400
Epoch [90/5000], d_loss: 0.9313, g_loss: 1.3531, D(x): 0.6231, D(G(z)): 0.3226
Epoch [100/5000], d_loss: 1.2556, g_loss: 1.3355, D(x): 0.6282, D(G(z)): 0.4879
Epoch [110/5000], d_loss: 0.9729, g_loss: 1.4088, D(x): 0.6416, D(G(z)): 0.3704
Epoch [120/5000], d_loss: 1.1842, g_loss: 1.1309, D(x): 0.5738, D(G(z)): 0.3869
Epoch [130/5000], d_loss: 1.1192, g_loss: 1.2131,

In [22]:
# Set the device for computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Number of synthetic samples to generate
num_samples = 10000

# Generate synthetic data
z = torch.randn(num_samples, input_dim).to(device)
synthetic_data = generator(z).detach().cpu().numpy()

# Generator output handling and rescaling
synthetic_rssi = rssi_scaler.inverse_transform(synthetic_data[:, :-3])  # inverse transform normalization
synthetic_rssi = synthetic_rssi + min_rssi  # shift back to original range

# Convert synthetic 'Floor' coordinates back to integer values
synthetic_coords = coord_scaler.inverse_transform(synthetic_data[:, -3:])
synthetic_coords[:, 2] = np.round(synthetic_coords[:, 2]).astype(int)  # Convert 'Floor' to integer


# Create a DataFrame with coordinates before RSSI values
coordinates_df = pd.DataFrame(synthetic_coords, columns=['Latitude', 'Longitude', 'Floor'])
rssi_df = pd.DataFrame(synthetic_rssi, columns=rssi_columns)
synthetic_df = pd.concat([coordinates_df, rssi_df], axis=1)

# Save to CSV
synthetic_df.to_csv(r'GANs_Noon_filtered_synthetic_samples.csv', index=False)
print("Synthetic data saved successfully!")

Synthetic data saved successfully!
