<h1>Data preparation<h1>

In [3]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
import pandas as pd
import sqlite3

con = sqlite3.connect('../nokia.db')

data = pd.read_sql_query('SELECT * FROM Makkah_5g', con)

# Drop rows with missing target values
data = data.dropna(subset=[
    'RSRP (d Bm)-Dominant RSRP (d Bm) ',
    'Serving RS Info-Serving RSRP (d Bm)',
    'Serving RS Info-Serving RSRQ (d B)',
    'Serving RS Info-Serving RS CINR (d B)'
])
data = data.fillna(0)  # Fill other missing values with 0 (or use other strategies)

# Define features and target variables
features = [
    'Latitude', 'Longitude', 'Serving Channel Info-DL EARFCN',
    'Serving Cell Info-Serving PCI', 'Serving RS Info-NR Best SS-RSRP',
    'Serving RS Info-NR Best SS-SINR', 'Data Throughput-RLC DL Throughput (kbps)',
    'Data Throughput-NR PDCP downlink throughput (Mbps)',
    '5G NR-NR Best SS-RSRP', '5G NR-NR PDCP downlink throughput (Mbps)',
    '5G NR-NR Best SS-SINR'
]

target_rsrp = 'RSRP (d Bm)-Dominant RSRP (d Bm) '
target_rsrq = 'Serving RS Info-Serving RSRQ (d B)'
target_sinr = 'Serving RS Info-Serving RS CINR (d B)'

# Extract features and target variables
X = data[features].values
y_rsrp = data[target_rsrp].values
y_rsrq = data[target_rsrq].values
y_sinr = data[target_sinr].values

<h1>Graph construction<h1>

In [4]:
from sklearn.neighbors import NearestNeighbors

# Define number of neighbors to consider for graph edges
k_neighbors = 10

# Fit NearestNeighbors to find nearest neighbors for each point
nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(X[:, :2])
distances, indices = nbrs.kneighbors(X[:, :2])

# Create edge index
edge_index = []
for i in range(X.shape[0]):
    for j in range(1, k_neighbors):  # Skip the first neighbor as it is the point itself
        edge_index.append([i, indices[i, j]])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# Create node features tensor
x = torch.tensor(X, dtype=torch.float)

# Create target tensors
y_rsrp = torch.tensor(y_rsrp, dtype=torch.float).view(-1, 1)
y_rsrq = torch.tensor(y_rsrq, dtype=torch.float).view(-1, 1)
y_sinr = torch.tensor(y_sinr, dtype=torch.float).view(-1, 1)

# Combine all targets into a single tensor
y = torch.cat([y_rsrp, y_rsrq, y_sinr], dim=1)

# Create the graph data object
graph_data = Data(x=x, edge_index=edge_index, y=y)


<h1>Model definition<h1>

In [5]:
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_targets):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.linear = torch.nn.Linear(hidden_channels, num_targets)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.linear(x)
        return x

# Define model, optimizer, and loss function
num_node_features = len(features)
hidden_channels = 64
num_targets = y.size(1)

model = GCN(num_node_features, hidden_channels, num_targets)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

<h1>GNN model training<h1>

In [6]:
# Create DataLoader for batching
loader = DataLoader([graph_data], batch_size=1, shuffle=True)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for batch in loader:
        model.train()
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')




Epoch 1, Loss: 11625105.0
Epoch 2, Loss: 99549064.0
Epoch 3, Loss: 36635564.0
Epoch 4, Loss: 23634316.0
Epoch 5, Loss: 14063769.0
Epoch 6, Loss: 21460038.0
Epoch 7, Loss: 24245364.0
Epoch 8, Loss: 14750123.0
Epoch 9, Loss: 3611399.0
Epoch 10, Loss: 192532.34375
Epoch 11, Loss: 4102446.5
Epoch 12, Loss: 7948010.0
Epoch 13, Loss: 8008065.5
Epoch 14, Loss: 5417126.5
Epoch 15, Loss: 3610949.5
Epoch 16, Loss: 3105351.25
Epoch 17, Loss: 2263714.25
Epoch 18, Loss: 1087732.0
Epoch 19, Loss: 532434.5625
Epoch 20, Loss: 279467.25
Epoch 21, Loss: 389691.6875
Epoch 22, Loss: 777754.0625
Epoch 23, Loss: 1184818.625
Epoch 24, Loss: 1385808.125
Epoch 25, Loss: 1294644.25
Epoch 26, Loss: 987384.5625
Epoch 27, Loss: 627613.4375
Epoch 28, Loss: 352156.1875
Epoch 29, Loss: 209719.9375
Epoch 30, Loss: 169098.734375
Epoch 31, Loss: 171979.0
Epoch 32, Loss: 174150.375
Epoch 33, Loss: 165050.375
Epoch 34, Loss: 160415.625
Epoch 35, Loss: 178868.859375
Epoch 36, Loss: 222285.90625
Epoch 37, Loss: 269739.75
Ep

<h1>Identify Poor Coverage Areas<h1>

In [7]:
# Switch to evaluation mode
model.eval()
with torch.no_grad():
    pred = model(graph_data.x, graph_data.edge_index)
    mse = criterion(pred, graph_data.y)
    print(f'Mean Squared Error: {mse.item()}')


Mean Squared Error: 825.060546875


<h1>Test the whole thing in one block<h1>

In [1]:
import torch #!!!!!!!!!!!!!!!!!!!
import torch.nn.functional as F #!!!!!!!!!!!!!!!!!!!
from torch_geometric.nn import GCNConv #!!!!!!!!!!!!!!!!!!!
from torch_geometric.data import Data, DataLoader # IDK !!!!!!!!!!!!!!!!!!!!!
import pandas as pd # read data
import numpy as np # matrix multiplication
from sklearn.neighbors import NearestNeighbors # to get nearest neighbors
import sqlite3 ## to fetch data

con = sqlite3.connect('../nokia.db')

data = pd.read_sql_query('SELECT * FROM Makkah_5g', con)


# Drop rows with missing target values
data = data.dropna(subset=[
    'RSRP (d Bm)-Dominant RSRP (d Bm) ',
    'Serving RS Info-Serving RSRP (d Bm)',
    'Serving RS Info-Serving RSRQ (d B)',
    'Serving RS Info-Serving RS CINR (d B)'
])
data = data.fillna(0)  # Fill other missing values with 0 (or use other strategies)

# Define features and target variables
features = [
    'Latitude', 'Longitude', 'Serving Channel Info-DL EARFCN',
    'Serving Cell Info-Serving PCI', 'Serving RS Info-NR Best SS-RSRP',
    'Serving RS Info-NR Best SS-SINR', 'Data Throughput-RLC DL Throughput (kbps)',
    'Data Throughput-NR PDCP downlink throughput (Mbps)',
    '5G NR-NR Best SS-RSRP', '5G NR-NR PDCP downlink throughput (Mbps)',
    '5G NR-NR Best SS-SINR'
]

target_rsrp = 'RSRP (d Bm)-Dominant RSRP (d Bm) '
target_rsrq = 'Serving RS Info-Serving RSRQ (d B)'
target_sinr = 'Serving RS Info-Serving RS CINR (d B)'

# Extract features and target variables
X = data[features].values
y_rsrp = data[target_rsrp].values
y_rsrq = data[target_rsrq].values
y_sinr = data[target_sinr].values

# Define number of neighbors to consider for graph edges
k_neighbors = 10

# Fit NearestNeighbors to find nearest neighbors for each point
nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(X[:, :2])
distances, indices = nbrs.kneighbors(X[:, :2])

# Create edge index
edge_index = []
for i in range(X.shape[0]):
    for j in range(1, k_neighbors):  # Skip the first neighbor as it is the point itself
        edge_index.append([i, indices[i, j]])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

# Create node features tensor
x = torch.tensor(X, dtype=torch.float)

# Create target tensors
y_rsrp = torch.tensor(y_rsrp, dtype=torch.float).view(-1, 1)
y_rsrq = torch.tensor(y_rsrq, dtype=torch.float).view(-1, 1)
y_sinr = torch.tensor(y_sinr, dtype=torch.float).view(-1, 1)

# Combine all targets into a single tensor
y = torch.cat([y_rsrp, y_rsrq, y_sinr], dim=1)

# Create the graph data object
graph_data = Data(x=x, edge_index=edge_index, y=y)

# Define the GCN model
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_targets):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.linear = torch.nn.Linear(hidden_channels, num_targets)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.linear(x)
        return x

# Define model, optimizer, and loss function
num_node_features = len(features)
hidden_channels = 64
num_targets = y.size(1)

model = GCN(num_node_features, hidden_channels, num_targets)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

# Create DataLoader for batching
loader = DataLoader([graph_data], batch_size=1, shuffle=True)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for batch in loader:
        model.train()
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Switch to evaluation mode
model.eval()
with torch.no_grad():
    pred = model(graph_data.x, graph_data.edge_index)
    mse = criterion(pred, graph_data.y)
    print(f'Mean Squared Error: {mse.item()}')

# Example new data point (replace with actual values)
new_data_point = np.array([[21.3891, 39.8579, 100, 2, -95, 10, 3000, 20, -90, 25, 8]])  # Example feature values

# Ensure the new point has the same number of features
assert new_data_point.shape[1] == len(features), "New data point must have the same number of features as the training data."

# Combine the new data point with the existing data
X_new = np.vstack([X, new_data_point])

# Fit NearestNeighbors to find nearest neighbors for the new graph
nbrs = NearestNeighbors(n_neighbors=k_neighbors).fit(X_new[:, :2])
distances, indices = nbrs.kneighbors(X_new[:, :2])

# Create new edge index
edge_index_new = []
for i in range(X_new.shape[0]):
    for j in range(1, k_neighbors):  # Skip the first neighbor as it is the point itself
        edge_index_new.append([i, indices[i, j]])

edge_index_new = torch.tensor(edge_index_new, dtype=torch.long).t().contiguous()

# Create new node features tensor
x_new = torch.tensor(X_new, dtype=torch.float)

# Create the new graph data object
graph_data_new = Data(x=x_new, edge_index=edge_index_new)

# Predict the target values
with torch.no_grad():
    pred_new = model(graph_data_new.x, graph_data_new.edge_index)

# Extract the prediction for the new data point (last point in the graph)
new_point_prediction = pred_new[-1].cpu()




Epoch 1, Loss: 22498712.0
Epoch 2, Loss: 194141104.0
Epoch 3, Loss: 26624438.0
Epoch 4, Loss: 12323571.0
Epoch 5, Loss: 52091748.0
Epoch 6, Loss: 48954376.0
Epoch 7, Loss: 25315300.0
Epoch 8, Loss: 6514927.5
Epoch 9, Loss: 840081.125
Epoch 10, Loss: 4296345.5
Epoch 11, Loss: 9381148.0
Epoch 12, Loss: 11416729.0
Epoch 13, Loss: 10599696.0
Epoch 14, Loss: 8189036.5
Epoch 15, Loss: 5333549.5
Epoch 16, Loss: 2763228.5
Epoch 17, Loss: 1097472.5
Epoch 18, Loss: 479561.09375
Epoch 19, Loss: 528730.6875
Epoch 20, Loss: 763485.0625
Epoch 21, Loss: 943659.0
Epoch 22, Loss: 1140743.625
Epoch 23, Loss: 1403956.5
Epoch 24, Loss: 1622461.5
Epoch 25, Loss: 1681698.375
Epoch 26, Loss: 1533068.375
Epoch 27, Loss: 1221287.125
Epoch 28, Loss: 859188.5
Epoch 29, Loss: 550568.3125
Epoch 30, Loss: 339493.65625
Epoch 31, Loss: 199007.84375
Epoch 32, Loss: 110554.0703125
Epoch 33, Loss: 27081.5703125
Epoch 34, Loss: 23283.525390625
Epoch 35, Loss: 97759.671875
Epoch 36, Loss: 142039.875
Epoch 37, Loss: 188809

In [6]:
print(new_point_prediction)

tensor([-116.3332,  -84.2166,   40.1887])


<h1>Visualize the predictions<h1>

In [7]:
import torch
import plotly.express as px

# Switch to evaluation mode
model.eval()
with torch.no_grad():
    pred = model(graph_data.x, graph_data.edge_index)
    mse = criterion(pred, graph_data.y)
    print(f'Mean Squared Error: {mse.item()}')

# Calculate residuals for each target
residuals_rsrp = (pred[:, 0] - graph_data.y[:, 0]).numpy()
residuals_rsrq = (pred[:, 1] - graph_data.y[:, 1]).numpy()
residuals_sinr = (pred[:, 2] - graph_data.y[:, 2]).numpy()

# Create scatter plots for each target's residuals
fig_rsrp = px.scatter(x=graph_data.y[:, 0].numpy(), y=residuals_rsrp, labels={'x': 'True RSRP', 'y': 'Residuals'}, title='RSRP Residuals')
fig_rsrq = px.scatter(x=graph_data.y[:, 1].numpy(), y=residuals_rsrq, labels={'x': 'True RSRQ', 'y': 'Residuals'}, title='RSRQ Residuals')
fig_sinr = px.scatter(x=graph_data.y[:, 2].numpy(), y=residuals_sinr, labels={'x': 'True SINR', 'y': 'Residuals'}, title='SINR Residuals')

# Show the plots
fig_rsrp.show()
fig_rsrq.show()
fig_sinr.show()


Mean Squared Error: 2563.67529296875


<h1> Evaluation metrics<h1>

In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


pred_np = pred.numpy()
y_np = graph_data.y.numpy()

mse = mean_squared_error(y_np, pred_np)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_np, pred_np)
r2 = r2_score(y_np, pred_np)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")


Mean Squared Error (MSE): 2563.677734375
Root Mean Squared Error (RMSE): 50.632774353027344
Mean Absolute Error (MAE): 34.42551803588867
R-squared (R2): -99.00042173855854
