<a href="https://colab.research.google.com/github/MaakAdityaGupta/localRepo/blob/main/V1_Main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

V1 version

In [None]:
#1
!pip -q install lightgbm pandas numpy scikit-learn joblib openpyxl

In [None]:
#2
import os
import math
import joblib
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import lightgbm as lgb

In [None]:
#3 ===== V1 FOLDER SETUP =====
from google.colab import drive
drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/antenna_ml/V1'
MODEL_DIR = f'{BASE_DIR}/models'
DATA_DIR  = f'{BASE_DIR}/data'

import os
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

print("Using V1 folder:", BASE_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using V1 folder: /content/drive/MyDrive/antenna_ml/V1


In [None]:
#4
DATA_PATH = f"{DATA_DIR}/antenna_dataset.csv"
df = pd.read_csv(DATA_PATH)

# FIX: remove extra spaces from column names immediately after reading
df.columns = df.columns.str.strip()

df = df.groupby(
    ["Tx_Lat","Tx_Lon","RX_Lat","RX_Lon","RX_Rotation_deg",
     "TX_Sector","TX_Element","RX_Sector","RX_Element","Frequency_Hz"],
    as_index=False
)["SNR_dB"].max()


In [None]:
#5 Convert rotation degrees to radians
rot_rad = np.deg2rad(df["RX_Rotation_deg"].values)

df["RX_rot_sin"] = np.sin(rot_rad)
df["RX_rot_cos"] = np.cos(rot_rad)

df.head()

Unnamed: 0,Tx_Lat,Tx_Lon,RX_Lat,RX_Lon,RX_Rotation_deg,TX_Sector,TX_Element,RX_Sector,RX_Element,Frequency_Hz,SNR_dB,RX_rot_sin,RX_rot_cos
0,30.968764,76.474704,30.968895,76.474175,0,1,1,1,1,5200000000,32.866855,0.0,1.0
1,30.968764,76.474704,30.968895,76.474175,0,1,1,1,1,5300000000,21.423559,0.0,1.0
2,30.968764,76.474704,30.968895,76.474175,0,1,1,1,1,5500000000,26.418292,0.0,1.0
3,30.968764,76.474704,30.968895,76.474175,0,1,1,1,1,5600000000,25.524836,0.0,1.0
4,30.968764,76.474704,30.968895,76.474175,0,1,1,1,1,5700000000,17.806764,0.0,1.0


In [None]:
#6
from sklearn.model_selection import GroupShuffleSplit

#1) Group key = same (RX_Lat, RX_Lon, RX_Rotation_deg)
df["group_key"] = (
    df["RX_Lat"].astype(str) + "_" +
    df["RX_Lon"].astype(str) + "_" +
    df["RX_Rotation_deg"].astype(str)
)

groups = df["group_key"]

#2) train+val vs test (group-wise)
gss1 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
trainval_idx, test_idx = next(gss1.split(df, groups=groups))

trainval_df = df.iloc[trainval_idx].copy()
test_df     = df.iloc[test_idx].copy()

#3) train vs val (group-wise)
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(gss2.split(trainval_df, groups=trainval_df["group_key"]))

train_df = trainval_df.iloc[train_idx].copy()
val_df   = trainval_df.iloc[val_idx].copy()

print("Train rows:", train_df.shape)
print("Val rows  :", val_df.shape)
print("Test rows :", test_df.shape)

print("Unique groups:")
print("Train:", train_df["group_key"].nunique())
print("Val  :", val_df["group_key"].nunique())
print("Test :", test_df["group_key"].nunique())

#sanity check: overlap should be 0
print("\nOverlap check:")
print("Train∩Val :", len(set(train_df["group_key"]) & set(val_df["group_key"])))
print("Train∩Test:", len(set(train_df["group_key"]) & set(test_df["group_key"])))
print("Val∩Test  :", len(set(val_df["group_key"]) & set(test_df["group_key"])))

Train rows: (4080, 14)
Val rows  : (1040, 14)
Test rows : (1280, 14)
Unique groups:
Train: 51
Val  : 13
Test : 16

Overlap check:
Train∩Val : 0
Train∩Test: 0
Val∩Test  : 0


In [None]:
#7
import os

# -------------------------------
# Your directories
# -------------------------------
MODEL_DIR = f'{BASE_DIR}/models'

# create folder if not exists
os.makedirs(MODEL_DIR, exist_ok=True)

# -------------------------------
# File paths
# -------------------------------
train_path = f"{MODEL_DIR}/train_split.csv"
val_path   = f"{MODEL_DIR}/val_split.csv"
test_path  = f"{MODEL_DIR}/test_split.csv"

# -------------------------------
# Save CSV files
# -------------------------------
train_df.to_csv(train_path, index=False)
val_df.to_csv(val_path, index=False)
test_df.to_csv(test_path, index=False)

print("Files saved:")
print(train_path)
print(val_path)
print(test_path)

Files saved:
/content/drive/MyDrive/antenna_ml/V1/models/train_split.csv
/content/drive/MyDrive/antenna_ml/V1/models/val_split.csv
/content/drive/MyDrive/antenna_ml/V1/models/test_split.csv


In [None]:
#8
FEATURES = [
    "Tx_Lat", "Tx_Lon",
    "RX_Lat", "RX_Lon",

    "TX_Sector", "TX_Element",
    "RX_Sector", "RX_Element",
    "Frequency_Hz",
    "RX_rot_sin", "RX_rot_cos"
]

In [None]:
#9

X_train = train_df[FEATURES]
y_train = train_df["SNR_dB"]

X_val = val_df[FEATURES]
y_val = val_df["SNR_dB"]

X_test = test_df[FEATURES]
y_test = test_df["SNR_dB"]

model = lgb.LGBMRegressor(
    n_estimators=2000,
    learning_rate=0.03,
    num_leaves=63,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42
)

fit_params = {}

# Check if X_val is not empty, not if it exists, as it's guaranteed to exist after the above lines.
# The .empty check is still valid for a DataFrame.
if not X_val.empty:
    fit_params["eval_set"] = [(X_val, y_val)]
    fit_params["eval_metric"] = "rmse"
    fit_params["callbacks"] = [lgb.early_stopping(stopping_rounds=100, verbose=True)]

model.fit(
    X_train, y_train,
    **fit_params
)

print("Training done.")


print("FEATURES used in training:", FEATURES)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000439 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 87
[LightGBM] [Info] Number of data points in the train set: 4080, number of used features: 7
[LightGBM] [Info] Start training from score 36.575165
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1994]	valid_0's rmse: 1.64905	valid_0's l2: 2.71936
Training done.
FEATURES used in training: ['Tx_Lat', 'Tx_Lon', 'RX_Lat', 'RX_Lon', 'TX_Sector', 'TX_Element', 'RX_Sector', 'RX_Element', 'Frequency_Hz', 'RX_rot_sin', 'RX_rot_cos']


In [None]:
pred_test = model.predict(X_test)
pred_train = model.predict(X_train)

# Calculate RMSE manually as 'squared' argument is not supported
rmse = np.sqrt(mean_squared_error(y_test, pred_test))

#regret_train =y_train-pred_train
#regret_test =y_test-pred_test
#print("train:", regret_train)
#print("test:", regret_test)

print("Test RMSE:", rmse)

rmse = np.sqrt(mean_squared_error(y_train, pred_train))
print("Train RMSE:", rmse)

Test RMSE: 1.888101015836321
Train RMSE: 0.8879262433384353


In [None]:
#11
bundle = {
    "model": model,
    "FEATURES": FEATURES,
    "TX_LAT": float(df["Tx_Lat"].iloc[0]),
    "TX_LON": float(df["Tx_Lon"].iloc[0])
}

MODEL_PATH = f"{MODEL_DIR}/lgbm_antenna_model.pkl"
joblib.dump(bundle, MODEL_PATH)

print("Saved:", MODEL_PATH)

Saved: /content/drive/MyDrive/antenna_ml/V1/models/lgbm_antenna_model.pkl


In [None]:
#12
def recommend_best_combo(rx_lat, rx_lon, rx_rotation_deg, bundle, tx_element_fixed=1):

    model = bundle["model"]
    tx_lat = bundle["TX_LAT"]
    tx_lon = bundle["TX_LON"]

    # Dataset structure ke according
    RX_SECTORS  = sorted(df["RX_Sector"].unique())     # 1–8
    RX_ELEMENTS = sorted(df["RX_Element"].unique())    # 1–2
    FREQS       = sorted(df["Frequency_Hz"].unique())  # 5 freqs

    rot_rad = math.radians(float(rx_rotation_deg))

    rows = []

    for sector in RX_SECTORS:
        for element in RX_ELEMENTS:
            for f in FREQS:

                rows.append({
                    "TX_Sector": int((tx_element_fixed - 1) // 4) + 1,
                    "RX_Sector": sector,
                    "Tx_Lat": tx_lat,
                    "Tx_Lon": tx_lon,
                    "RX_Lat": rx_lat,
                    "RX_Lon": rx_lon,
                    "TX_Element": tx_element_fixed,
                    "RX_Element": element,
                    "Frequency_Hz": f,
                    "RX_rot_sin": math.sin(rot_rad),
                    "RX_rot_cos": math.cos(rot_rad)
                })

    grid = pd.DataFrame(rows)

    grid["pred_snr"] = model.predict(grid[bundle["FEATURES"]])

    print("Grid rows:", len(grid))   # Should be 80

    grid.to_excel(f"{MODEL_DIR}/prediction_grid.xlsx", index=False)

    best = grid.sort_values("pred_snr", ascending=False).iloc[0]

    return best, grid

In [None]:
#13  Pick one random RX location from dataset
# Pick 1 random group from TEST set
g = test_df["group_key"].sample(1).iloc[0]
one_group = test_df[test_df["group_key"] == g].copy()

rx_lat = float(one_group["RX_Lat"].iloc[0])
rx_lon = float(one_group["RX_Lon"].iloc[0])
rx_rot = float(one_group["RX_Rotation_deg"].iloc[0])

#rx_lat = 30.968895
#rx_lon = 76.474175
#rx_rot = 9

print("TEST group picked:", g)
print("RX:", rx_lat, rx_lon, "ROT:", rx_rot)

best, grid = recommend_best_combo(rx_lat, rx_lon, rx_rot, bundle)
print("Model chosen:")
print(best[[ "RX_Sector", "RX_Element", "Frequency_Hz", "pred_snr"]])



TEST group picked: 30.968895_76.474175_351
RX: 30.968895 76.474175 ROT: 351.0
Grid rows: 80
Model chosen:
RX_Sector       7.000000e+00
RX_Element      1.000000e+00
Frequency_Hz    5.200000e+09
pred_snr        4.604315e+01
Name: 60, dtype: float64


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#14
def regret_for_location(df, rx_lat, rx_lon, rx_rotation_deg, chosen, verbose=True):
    # 1) filter by RX location
    loc_df = df[(df["RX_Lat"] == rx_lat) & (df["RX_Lon"] == rx_lon)].copy()
    if verbose:
        print("Location rows:", loc_df.shape)

    # 2) filter by rotation
    df_rot = loc_df[loc_df["RX_Rotation_deg"] == rx_rotation_deg].copy()
    if verbose:
        print("Rotation rows:", df_rot.shape)

    if df_rot.empty:
        return None

    # 3) best actual SNR at this location+rotation
    best_actual = float(df_rot["SNR_dB"].max())

    # 4) chosen combo mask (TX removed, RX_Sector added)
    mask = (
        (df_rot["RX_Element"] == chosen["RX_Element"]) &
        (df_rot["RX_Sector"] == chosen["RX_Sector"]) &
        (df_rot["Frequency_Hz"] == chosen["Frequency_Hz"])
    )

    if mask.sum() == 0:
        return None

    # duplicates fix → take max
    chosen_actual = float(df_rot.loc[mask, "SNR_dB"].max())
    print(chosen)
    print(rx_lat, rx_lon, rx_rotation_deg)

    regret = best_actual - chosen_actual
    return best_actual, chosen_actual, regret

In [None]:
#15

import numpy as np

test_groups = test_df["group_key"].unique()

regrets = []
skipped = 0

for g in test_groups:
    one_group = test_df[test_df["group_key"] == g].copy()

    rx_lat = float(one_group["RX_Lat"].iloc[0])
    rx_lon = float(one_group["RX_Lon"].iloc[0])
    rx_rot = float(one_group["RX_Rotation_deg"].iloc[0])

    best, grid = recommend_best_combo(rx_lat, rx_lon, rx_rot, bundle)

    out = regret_for_location(df, rx_lat, rx_lon, rx_rot, best)

    if out is None:
        skipped += 1
        continue

    best_actual, chosen_actual, regret = out
    regrets.append(regret)

regrets = np.array(regrets)


print("Total test groups:", len(test_groups))
print("Computed regrets :", len(regrets))
print("Skipped groups   :", skipped)
print("regrets:", regrets)
print("\nAverage regret:", regrets.mean())
print("Median regret :", np.median(regrets))
print("Min regret    :", regrets.min())
print("Max regret    :", regrets.max())

Grid rows: 80
Location rows: (3200, 14)
Rotation rows: (80, 14)
TX_Sector       1.000000e+00
RX_Sector       7.000000e+00
Tx_Lat          3.096876e+01
Tx_Lon          7.647470e+01
RX_Lat          3.096889e+01
RX_Lon          7.647418e+01
TX_Element      1.000000e+00
RX_Element      1.000000e+00
Frequency_Hz    5.200000e+09
RX_rot_sin      0.000000e+00
RX_rot_cos      1.000000e+00
pred_snr        4.554672e+01
Name: 60, dtype: float64
30.968895 76.474175 0.0
Grid rows: 80
Location rows: (3200, 14)
Rotation rows: (80, 14)
TX_Sector       1.000000e+00
RX_Sector       2.000000e+00
Tx_Lat          3.096876e+01
Tx_Lon          7.647470e+01
RX_Lat          3.096889e+01
RX_Lon          7.647418e+01
TX_Element      1.000000e+00
RX_Element      2.000000e+00
Frequency_Hz    5.300000e+09
RX_rot_sin      5.877853e-01
RX_rot_cos      8.090170e-01
pred_snr        4.536844e+01
Name: 16, dtype: float64
30.968895 76.474175 36.0
Grid rows: 80
Location rows: (3200, 14)
Rotation rows: (80, 14)
TX_Sector    