In [1]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
# data = pd.read_csv("C:\Users\alanx\OneDrive - The University of Sydney (Students)\Thesis\Deep Learning\wifi_localization\data\csv\ultimate_combined.csv")
data = pd.read_csv("data/csv/ultimate_combined.csv")
data.head()

Unnamed: 0,timestamps,csi_len,channel,err_info,noise_floor,rate,bandWidth,num_tones,nr,nc,...,payload_length,block_length,subcarriers,ant1_amplitude,ant2_amplitude,ant1_phase,ant2_phase,with_receiver,coord1,coord2
0,429192390,560,2437,0,0,140,0,56,2,2,...,1320,1904,1,27.45906,92.070625,0.992894,-1.418147,0,1,1
1,429192390,560,2437,0,0,140,0,56,2,2,...,1320,1904,2,27.45906,167.839209,-0.992894,1.994465,0,1,1
2,429192390,560,2437,0,0,140,0,56,2,2,...,1320,1904,3,28.84441,94.339811,0.982794,-1.485895,0,1,1
3,429192390,560,2437,0,0,140,0,56,2,2,...,1320,1904,4,29.154759,174.642492,-1.030377,1.983207,0,1,1
4,429192390,560,2437,0,0,140,0,56,2,2,...,1320,1904,5,30.805844,89.453899,0.946773,-1.470015,0,1,1


In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [4]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())

['/device:CPU:0']


In [3]:
variances = data.var()
print(variances)

low_variance_cols = variances[variances < 5e-3].index.tolist()
print("Columns with low or zero variance:")
print(low_variance_cols)

timestamps        7.333163e+23
csi_len           6.267564e+03
channel           1.250560e-03
err_info          3.915442e-03
noise_floor       0.000000e+00
rate              6.131537e+00
bandWidth         0.000000e+00
num_tones         0.000000e+00
nr                0.000000e+00
nc                7.991891e-02
rssi              3.517408e+01
rssi1             2.618537e+01
rssi2             5.999930e+01
rssi3             0.000000e+00
payload_length    1.135595e+04
block_length      1.946333e+04
subcarriers       2.612500e+02
ant1_amplitude    5.845840e+03
ant2_amplitude    5.987510e+03
ant1_phase        3.290634e+00
ant2_phase        3.290462e+00
with_receiver     2.470494e-01
coord1            3.918758e+00
coord2            2.106026e+00
dtype: float64
Columns with low or zero variance:
['channel', 'err_info', 'noise_floor', 'bandWidth', 'num_tones', 'nr', 'rssi3']


In [4]:
# Drop constant columns and timeframe from the DataFrame
data_processed = data.copy(True)
data_processed.drop(low_variance_cols, axis=1, inplace=True)
data_processed.drop('timestamps', axis=1, inplace=True)
data_processed.head()

Unnamed: 0,csi_len,rate,nc,rssi,rssi1,rssi2,payload_length,block_length,subcarriers,ant1_amplitude,ant2_amplitude,ant1_phase,ant2_phase,with_receiver,coord1,coord2
0,560,140,2,58,52,57,1320,1904,1,27.45906,92.070625,0.992894,-1.418147,0,1,1
1,560,140,2,58,52,57,1320,1904,2,27.45906,167.839209,-0.992894,1.994465,0,1,1
2,560,140,2,58,52,57,1320,1904,3,28.84441,94.339811,0.982794,-1.485895,0,1,1
3,560,140,2,58,52,57,1320,1904,4,29.154759,174.642492,-1.030377,1.983207,0,1,1
4,560,140,2,58,52,57,1320,1904,5,30.805844,89.453899,0.946773,-1.470015,0,1,1


In [5]:
features_to_scale = ['csi_len', 'payload_length', 'block_length', 'ant1_amplitude', 'ant2_amplitude', 'ant1_phase', 'ant2_phase', 'rssi', 'rssi1', 'rssi2']
scaler = StandardScaler()

# Apply standardization on features
# data_processed = data_cleaned.copy()
data_processed[features_to_scale] = scaler.fit_transform(data_processed[features_to_scale])

In [6]:
data_processed.head()

Unnamed: 0,csi_len,rate,nc,rssi,rssi1,rssi2,payload_length,block_length,subcarriers,ant1_amplitude,ant2_amplitude,ant1_phase,ant2_phase,with_receiver,coord1,coord2
0,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,1,-1.788786,-0.702974,0.544152,-0.781639,0,1,1
1,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,2,-1.788786,0.276214,-0.550543,1.099663,0,1,1
2,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,3,-1.770667,-0.673649,0.538584,-0.818986,0,1,1
3,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,4,-1.766608,0.364135,-0.571206,1.093457,0,1,1
4,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,5,-1.745013,-0.736791,0.518727,-0.810232,0,1,1


In [7]:
data_processed = pd.concat([data_processed, pd.get_dummies(data_processed['subcarriers'], prefix='subcarrier')], axis=1)
data_processed.drop('subcarriers', axis=1, inplace=True)
data_processed.head()

Unnamed: 0,csi_len,rate,nc,rssi,rssi1,rssi2,payload_length,block_length,ant1_amplitude,ant2_amplitude,...,subcarrier_47,subcarrier_48,subcarrier_49,subcarrier_50,subcarrier_51,subcarrier_52,subcarrier_53,subcarrier_54,subcarrier_55,subcarrier_56
0,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,-1.788786,-0.702974,...,False,False,False,False,False,False,False,False,False,False
1,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,-1.788786,0.276214,...,False,False,False,False,False,False,False,False,False,False
2,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,-1.770667,-0.673649,...,False,False,False,False,False,False,False,False,False,False
3,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,-1.766608,0.364135,...,False,False,False,False,False,False,False,False,False,False
4,0.309827,140,2,0.407391,0.183578,0.595846,-2.167693,-1.479957,-1.745013,-0.736791,...,False,False,False,False,False,False,False,False,False,False


# Model

In [8]:
X = data_processed.drop(columns=['coord1', 'coord2']) 
y = data_processed[['coord1', 'coord2']]

In [9]:
# First, split the data into training and a temporary set (which will later be split into validation and test sets)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)  # 60% train, 40% temp

# Now, split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)  # 20% val, 20% test

# Check the shapes of your splits
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (16792440, 69)
Validation set shape: (5597480, 69)
Test set shape: (5597480, 69)


In [10]:
def create_dnn_model(input_shape):
    model = models.Sequential()
    model.add(layers.InputLayer(input_shape=input_shape))
    
    # Hidden layers
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(32, activation='relu'))
    
    # Output layer for regression (2 neurons for x and y coordinates)
    model.add(layers.Dense(2))  # For 2D localization, change to 3 for 3D localization

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Example of creating the model, input_shape should be (number of features,)
input_shape = (X_train.shape[1],)  # Adjust based on your number of features
model = create_dnn_model(input_shape)

# Print the model summary
model.summary()



In [11]:
# Train the model
history = model.fit(X_train, y_train, 
                    epochs=50, 
                    batch_size=32, 
                    validation_data=(X_val, y_val),
                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)])

# Evaluate the model on test data
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")


Epoch 1/50
[1m524764/524764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m468s[0m 889us/step - loss: 3.0020 - mae: 1.4480 - val_loss: 3.0134 - val_mae: 1.4512
Epoch 2/50
[1m524764/524764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m488s[0m 929us/step - loss: 3.0140 - mae: 1.4523 - val_loss: 3.0133 - val_mae: 1.4529
Epoch 3/50
[1m524764/524764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m492s[0m 935us/step - loss: 3.0136 - mae: 1.4522 - val_loss: 3.0135 - val_mae: 1.4506
Epoch 4/50
[1m524764/524764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 886us/step - loss: 3.0124 - mae: 1.4519 - val_loss: 3.0136 - val_mae: 1.4537
Epoch 5/50
[1m524764/524764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m472s[0m 899us/step - loss: 3.0128 - mae: 1.4521 - val_loss: 3.0132 - val_mae: 1.4507
Epoch 6/50
[1m312092/524764[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m2:51[0m 808us/step - loss: 3.0126 - mae: 1.4523

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
