In [54]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LinearRegression,HuberRegressor,RANSACRegressor,TheilSenRegressor,SGDRegressor
from sklearn.ensemble import RandomForestRegressor,HistGradientBoostingRegressor,AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn import neighbors
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

In [55]:
import random
import os
import torch
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.use_deterministic_algorithms(True, warn_only=True)
set_seed(42)

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()

In [56]:
# freqs = ["7GHz", "28GHz", "800MHz"]
# tx_ids = [1, 2, 8, 18, 19]

# for freq in freqs:
#     for txid in tx_ids:
#         meta_file = f"./{freq}_Tx_{txid}.csv"
#         train_file = f"./{freq}_Tx_{txid}_train_data.csv"
#         output_file = f"./{freq}_Tx_{txid}_train_data_with_coords.csv"

#         if not os.path.exists(meta_file) or not os.path.exists(train_file):
#             print(f"Skipping {freq}_Tx_{txid} ‚Äî missing file(s).")
#             continue

#         # --- Read metadata (Tx and Rx coordinates) ---
#         meta_df = pd.read_csv(meta_file)
#         tx_lon = meta_df.iloc[:, 3].values  # 4th column (Tx_Lon)
#         tx_lat = meta_df.iloc[:, 4].values  # 5th column (Tx_Lat)
#         rx_lon = meta_df["Rx_lon"].values
#         rx_lat = meta_df["Rx_lat"].values

#         # --- Read train data ---
#         train_df = pd.read_csv(train_file)

#         # --- Prepare coordinate DataFrame ---
#         coords_df = pd.DataFrame({
#             "Tx_Lon": tx_lon,
#             "Tx_Lat": tx_lat,
#             "Rx_Lon": rx_lon,
#             "Rx_Lat": rx_lat
#         })

#         # --- Insert before the last column ---
#         insert_pos = len(train_df.columns) - 1
#         for col_name in ["Tx_Lon", "Tx_Lat", "Rx_Lon", "Rx_Lat"]:
#             train_df.insert(insert_pos, col_name, coords_df[col_name])
#             insert_pos += 1  # shift forward for next column

#         # --- Save output ---
#         train_df.to_csv(output_file, index=False)
#         print(f"‚úÖ Processed {output_file} (shape: {train_df.shape})")

In [57]:
# datasets = [
#     ('7GHz', './7GHz_Tx_1_train_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_2_train_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_8_train_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_18_train_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_19_train_data_with_coords.csv'),

#     ('28GHz', './28GHz_Tx_1_train_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_2_train_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_8_train_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_18_train_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_19_train_data_with_coords.csv'),

#     ('800MHz', './800MHz_Tx_1_train_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_2_train_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_8_train_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_18_train_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_19_train_data_with_coords.csv'),
# ]

# # Helper: Convert frequency string to numeric Hz
# def freq_to_hz(freq_str):
#     if "GHz" in freq_str:
#         return float(freq_str.replace("GHz", "")) * 1e9
#     elif "MHz" in freq_str:
#         return float(freq_str.replace("MHz", "")) * 1e6
#     else:
#         raise ValueError(f"Unrecognized frequency format: {freq_str}")

# # Loop through all train datasets
# for freq, path in datasets:
#     try:
#         df = pd.read_csv(path)
#         center_freq = freq_to_hz(freq)
#         df["CenterFreq_Hz"] = center_freq

#         # Save updated file (overwrite or create new one)
#         output_path = path.replace("_with_coords", "_with_coords_freq")
#         df.to_csv(output_path, index=False)

#         print(f"‚úÖ Added CenterFreq_Hz={center_freq:.0f} to {output_path} (shape={df.shape})")

#     except Exception as e:
#         print(f"‚ö†Ô∏è Error processing {path}: {e}")

In [58]:
def load_and_subset(data_path, index_path):
    df = pd.read_csv(data_path)
    idx = pd.read_csv(index_path, header=None).squeeze()  # assumes one column of indices
    df_subset = df.loc[idx].reset_index(drop=True)
    return df_subset

# ---- Example mapping: each data file with its index file ----
files = [
#     ('./7GHz_Tx_1_train_data_with_coords_freq.csv', './7GHz_Tx_1_random_subset'),
#     ('./7GHz_Tx_2_train_data_with_coords_freq.csv', './7GHz_Tx_2_random_subset'),
#     ('./7GHz_Tx_8_train_data_with_coords_freq.csv', './7GHz_Tx_8_random_subset'),
#     ('./7GHz_Tx_18_train_data_with_coords_freq.csv', './7GHz_Tx_18_random_subset'),
#     ('./7GHz_Tx_19_train_data_with_coords_freq.csv', './7GHz_Tx_19_random_subset'),

#     ('./28GHz_Tx_1_train_data_with_coords_freq.csv', './28GHz_Tx_1_random_subset'),
#     ('./28GHz_Tx_2_train_data_with_coords_freq.csv', './28GHz_Tx_2_random_subset'),
#     ('./28GHz_Tx_8_train_data_with_coords_freq.csv', './28GHz_Tx_8_random_subset'),
#     ('./28GHz_Tx_18_train_data_with_coords_freq.csv', './28GHz_Tx_18_random_subset'),
#     ('./28GHz_Tx_19_train_data_with_coords_freq.csv', './28GHz_Tx_19_random_subset'),

    ('./800MHz_Tx_1_train_data_with_coords_freq.csv', './800MHz_Tx_1_random_subset'),
#     ('./800MHz_Tx_2_train_data_with_coords_freq.csv', './800MHz_Tx_2_random_subset'),
#     ('./800MHz_Tx_8_train_data_with_coords_freq.csv', './800MHz_Tx_8_random_subset'),
#     ('./800MHz_Tx_18_train_data_with_coords_freq.csv', './800MHz_Tx_18_random_subset'),
#     ('./800MHz_Tx_19_train_data_with_coords_freq.csv', './800MHz_Tx_19_random_subset'),
]

# ---- Loop through all and create subsets ----
subset_dfs = []
for data_file, index_file in files:
    df_subset = load_and_subset(data_file, index_file)
    subset_dfs.append(df_subset)
    print(f"Loaded {data_file} ‚Üí subset shape: {df_subset.shape}")

# # ---- Access individually if needed ----
# df1_subset, df2_subset, df3_subset, df4_subset, df5_subset, \
# df6_subset, df7_subset, df8_subset, df9_subset, df10_subset, \
# df11_subset, df12_subset, df13_subset, df14_subset, df15_subset = subset_dfs

# ---- (Optional) Combine all subsets into one big DataFrame ----
df = pd.concat(subset_dfs, ignore_index=True)
# df = df.sample(frac = 0.5)
df = df.sample(frac = 0.05)
# df = df.sample(frac = 0.0125)

print("Combined subset shape:", df.shape)

Loaded ./800MHz_Tx_1_train_data_with_coords_freq.csv ‚Üí subset shape: (8000, 26)
Combined subset shape: (400, 26)


In [59]:
print(df.columns.tolist())

['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m', 'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area', 'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height', 'pointnet_prediction', 'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat', 'measured_pathloss_dB', 'CenterFreq_Hz']


In [60]:
df1 = pd.read_csv('./7GHz_Tx_5_test_data_with_coords_freq.csv')
df2 = pd.read_csv('./7GHz_Tx_9_test_data_with_coords_freq.csv')
df3 = pd.read_csv('./7GHz_Tx_12_test_data_with_coords_freq.csv')
df4 = pd.read_csv('./7GHz_Tx_14_test_data_with_coords_freq.csv')
df5 = pd.read_csv('./7GHz_Tx_20_test_data_with_coords_freq.csv')

df6 = pd.read_csv('./28GHz_Tx_5_test_data_with_coords_freq.csv')
df7 = pd.read_csv('./28GHz_Tx_9_test_data_with_coords_freq.csv')
df8 = pd.read_csv('./28GHz_Tx_12_test_data_with_coords_freq.csv')
df9 = pd.read_csv('./28GHz_Tx_14_test_data_with_coords_freq.csv')
df10 = pd.read_csv('./28GHz_Tx_20_test_data_with_coords_freq.csv')


df11 = pd.read_csv('./800MHz_Tx_5_test_data_with_coords_freq.csv')
df12 = pd.read_csv('./800MHz_Tx_9_test_data_with_coords_freq.csv')
df13 = pd.read_csv('./800MHz_Tx_12_test_data_with_coords_freq.csv')
df14 = pd.read_csv('./800MHz_Tx_14_test_data_with_coords_freq.csv')
df15 = pd.read_csv('./800MHz_Tx_20_test_data_with_coords_freq.csv')

In [61]:
df2 = pd.concat([df13], ignore_index=True)   #800

In [62]:
# X_test = df2[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
#               'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
#               'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
#               'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
#               'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height',
#              'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat','CenterFreq_Hz']]


X_test = df2[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
              'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
              'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
              'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
              'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height',
             'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat']]

# X_test = df2[['distance_m', 'umi_pathloss_dB', 'tx_height_m','Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat','CenterFreq_Hz']]
y_test = df2[['measured_pathloss_dB']]

In [63]:
#set the input features
# X_train = df[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
#               'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
#               'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
#               'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
#               'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height', 
#               'pointnet_prediction']]
# X_train = df[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
#               'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
#               'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
#               'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
#               'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height',
#               'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat','CenterFreq_Hz']]


X_train = df[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
              'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
              'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
              'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
              'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height',
              'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat']]

# X_train = df[['distance_m', 'umi_pathloss_dB', 'tx_height_m','Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat','CenterFreq_Hz']]
#set the target variable
y_train = df[['measured_pathloss_dB']]

In [64]:
X_valid = df2[['distance_m', 'fspl_dB', 'umi_pathloss_dB', 'tx_height_m',
              'f3_polygon_count', 'los_intersection_count', 'dist_to_first_intersection_m', 
              'dist_to_last_intersection_m', 'is_los', 'fresnel3_obstr_poly_area', 'los_obstr_poly_area',
              'f1_polygon_count', 'fresnel1_obstr_poly_area', 'tx_sphere_poly_count', 'tx_sphere_obstr_poly_area', 
              'avg_tx_clutter_height', 'rx_sphere_poly_count', 'rx_sphere_obstr_poly_area', 'avg_rx_clutter_height',
             'Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat']]

# X_test = df2[['distance_m', 'umi_pathloss_dB', 'tx_height_m','Tx_Lon', 'Tx_Lat', 'Rx_Lon', 'Rx_Lat','CenterFreq_Hz']]
y_valid = df2[['measured_pathloss_dB']]

In [65]:
print(y_train)

      measured_pathloss_dB
2215             98.382195
2582            124.151909
1662            119.537445
3027            131.411621
4343            115.419434
...                    ...
6851            104.830780
1972            101.205910
6527            101.457291
7918            118.991615
3733            123.696854

[400 rows x 1 columns]


In [66]:
import optuna
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error
import time

# Step 1: Data Splitting (Training and Validation Only)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Step 2: Identify and Mark Categorical Features
cat_features = [X_train.columns.get_loc(col) for col in X_train.select_dtypes(include=['object', 'category']).columns]

# Step 3: Optuna Hyperparameter Tuning
def objective(trial):
    # Define hyperparameter search space
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1500),
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1),
        "depth": trial.suggest_int("depth", 1, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 10),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.5, 1.0)
    }

    # Initialize CatBoost model with sampled parameters
    model = CatBoostRegressor(
        loss_function="MAE",
        cat_features=cat_features,
        verbose=0,  # Disable training output for optimization
        **params
    )

    # Train model
    model.fit(X_train, y_train, eval_set=(X_valid, y_valid), early_stopping_rounds=50)

    # Predict and evaluate performance
    y_pred = model.predict(X_valid)
    return np.sqrt(mean_squared_error(y_valid, y_pred)) 

start_optuna = time.time()

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

end_optuna = time.time()
optuna_time = end_optuna - start_optuna

# Step 5: Output the Best Results
best_params = study.best_params
print("Best RMSE:", study.best_value)
print("Best Parameters:", best_params)
print(f"üéØ Optuna tuning time: {optuna_time/60:.2f} minutes")

# Step 6: Refit Final Model Using Best Parameters
model = CatBoostRegressor(
    loss_function="MAE",
    cat_features=cat_features,
    verbose=100,
    **best_params
)

# Measure model training time
start_train = time.time()

model.fit(
    X_train, y_train,
    eval_set=(X_valid, y_valid),
    early_stopping_rounds=50
)

end_train = time.time()
train_time = end_train - start_train

print(f"‚è±Ô∏è Final model training time: {train_time:.2f} seconds")

# Step 7: Save model and parameters
model.save_model("best_catboost_model.cbm")
print("‚úÖ Model saved as best_catboost_model.cbm")

best_params_file = "./best_optuna_params.txt"
with open(best_params_file, "w") as f:
    f.write(str(best_params))
print("‚úÖ Best parameters saved to", best_params_file)

# Step 8: Save runtime results
with open("training_runtime_summary.txt", "w") as f:
    f.write(f"Best RMSE: {study.best_value:.4f}\n")
    f.write(f"Optuna tuning time: {optuna_time/60:.2f} minutes\n")
    f.write(f"Final model training time: {train_time:.2f} seconds\n")
print("üìÑ Training summary saved to training_runtime_summary.txt")

[I 2025-11-01 01:51:18,325] A new study created in memory with name: no-name-8b89a9b5-61b9-4dff-b951-fda63a123352
[I 2025-11-01 01:51:19,053] Trial 0 finished with value: 19.01787646729108 and parameters: {'iterations': 1080, 'learning_rate': 0.039025552497958506, 'depth': 8, 'l2_leaf_reg': 5.838311379533672, 'subsample': 0.5987443051665071, 'colsample_bylevel': 0.8576015449301211}. Best is trial 0 with value: 19.01787646729108.
[I 2025-11-01 01:51:19,289] Trial 1 finished with value: 19.268915463588467 and parameters: {'iterations': 1107, 'learning_rate': 0.07539888384154877, 'depth': 8, 'l2_leaf_reg': 0.6649674415387996, 'subsample': 0.9114013653380599, 'colsample_bylevel': 0.9957582322466257}. Best is trial 0 with value: 19.01787646729108.
[I 2025-11-01 01:51:19,946] Trial 2 finished with value: 19.291000270179232 and parameters: {'iterations': 739, 'learning_rate': 0.004860022194172711, 'depth': 6, 'l2_leaf_reg': 5.053585446435495, 'subsample': 0.8636131202707533, 'colsample_byleve

Best RMSE: 18.684707983151455
Best Parameters: {'iterations': 1491, 'learning_rate': 0.0789528198721598, 'depth': 1, 'l2_leaf_reg': 7.358744425531777, 'subsample': 0.6576333205766673, 'colsample_bylevel': 0.9143925506303029}
üéØ Optuna tuning time: 0.10 minutes
0:	learn: 8.9094930	test: 17.7458389	best: 17.7458389 (0)	total: 277us	remaining: 414ms
100:	learn: 6.5255338	test: 15.3114973	best: 15.3114973 (100)	total: 22.2ms	remaining: 305ms
200:	learn: 6.2481295	test: 15.1191104	best: 15.1191104 (200)	total: 42.6ms	remaining: 273ms
Stopped by overfitting detector  (50 iterations wait)

bestTest = 15.0921928
bestIteration = 211

Shrink model to first 212 iterations.
‚è±Ô∏è Final model training time: 0.07 seconds
‚úÖ Model saved as best_catboost_model.cbm
‚úÖ Best parameters saved to ./best_optuna_params.txt
üìÑ Training summary saved to training_runtime_summary.txt


In [67]:
# freqs = ["7GHz", "28GHz", "800MHz"]
# tx_ids = [5, 9, 12, 14, 20]

# for freq in freqs:
#     for txid in tx_ids:
#         meta_file = f"./{freq}_Tx_{txid}.csv"
#         train_file = f"./{freq}_Tx_{txid}_test_data.csv"
#         output_file = f"./{freq}_Tx_{txid}_test_data_with_coords.csv"

#         if not os.path.exists(meta_file) or not os.path.exists(train_file):
#             print(f"Skipping {freq}_Tx_{txid} ‚Äî missing file(s).")
#             continue

#         # --- Read metadata (Tx and Rx coordinates) ---
#         meta_df = pd.read_csv(meta_file)
#         tx_lon = meta_df.iloc[:, 3].values  # 4th column (Tx_Lon)
#         tx_lat = meta_df.iloc[:, 4].values  # 5th column (Tx_Lat)
#         rx_lon = meta_df["Rx_lon"].values
#         rx_lat = meta_df["Rx_lat"].values

#         # --- Read train data ---
#         train_df = pd.read_csv(train_file)

#         # --- Prepare coordinate DataFrame ---
#         coords_df = pd.DataFrame({
#             "Tx_Lon": tx_lon,
#             "Tx_Lat": tx_lat,
#             "Rx_Lon": rx_lon,
#             "Rx_Lat": rx_lat
#         })

#         # --- Insert before the last column ---
#         insert_pos = len(train_df.columns) - 1
#         for col_name in ["Tx_Lon", "Tx_Lat", "Rx_Lon", "Rx_Lat"]:
#             train_df.insert(insert_pos, col_name, coords_df[col_name])
#             insert_pos += 1  # shift forward for next column

#         # --- Save output ---
#         train_df.to_csv(output_file, index=False)
#         print(f"‚úÖ Processed {output_file} (shape: {train_df.shape})")

In [68]:
# datasets = [
#     ('7GHz', './7GHz_Tx_5_test_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_9_test_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_12_test_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_14_test_data_with_coords.csv'),
#     ('7GHz', './7GHz_Tx_20_test_data_with_coords.csv'),

#     ('28GHz', './28GHz_Tx_5_test_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_9_test_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_12_test_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_14_test_data_with_coords.csv'),
#     ('28GHz', './28GHz_Tx_20_test_data_with_coords.csv'),

#     ('800MHz', './800MHz_Tx_5_test_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_9_test_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_12_test_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_14_test_data_with_coords.csv'),
#     ('800MHz', './800MHz_Tx_20_test_data_with_coords.csv'),
# ]

# # Helper: Convert frequency string to numeric Hz
# def freq_to_hz(freq_str):
#     if "GHz" in freq_str:
#         return float(freq_str.replace("GHz", "")) * 1e9
#     elif "MHz" in freq_str:
#         return float(freq_str.replace("MHz", "")) * 1e6
#     else:
#         raise ValueError(f"Unrecognized frequency format: {freq_str}")

# # Loop through all test datasets
# for freq, path in datasets:
#     try:
#         df = pd.read_csv(path)
#         center_freq = freq_to_hz(freq)
#         df["CenterFreq_Hz"] = center_freq

#         # Save updated file (overwrite or create new one)
#         output_path = path.replace("_with_coords", "_with_coords_freq")
#         df.to_csv(output_path, index=False)

#         print(f"‚úÖ Added CenterFreq_Hz={center_freq:.0f} to {output_path} (shape={df.shape})")

#     except Exception as e:
#         print(f"‚ö†Ô∏è Error processing {path}: {e}")

In [69]:
# df2 = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15], ignore_index=True)

In [70]:
# df2 = pd.concat([df11, df12, df13, df14, df15], ignore_index=True)   #800


In [71]:
# df2 = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)       #7G
# df2 = pd.concat([df5], ignore_index=True)       #7G

In [72]:
# df2 = pd.concat([df6, df7, df8, df9, df10], ignore_index=True)       #28G
# df2 = pd.concat([df10], ignore_index=True)       #28G

In [73]:
# df2 = pd.concat([df1, df2, df3, df4, df5, df11, df12, df13, df14, df15], ignore_index=True)

In [74]:
from catboost import CatBoostRegressor

# Load the saved model
model = CatBoostRegressor()
model.load_model("best_catboost_model.cbm")
print("Model loaded successfully!")

Model loaded successfully!


In [75]:
y_pred = model.predict(X_test)
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 18.6847


In [76]:
import pandas as pd

# Load the original file
# file_path = "Result_Task1_7GHz_Tx_20.csv"
# file_path = "Result_Task1_28GHz_Tx_20.csv"
file_path = "Result_Task1_800MHz_Tx_12.csv"

df = pd.read_csv(file_path)

# Ensure y_pred has the same length as df
print("Original shape:", df.shape)
print("Predictions length:", len(y_pred))

if len(y_pred) != len(df):
    raise ValueError("‚ùå Length mismatch: y_pred and CSV rows must match!")

# Add predictions as the last column
df["Predicted PL"] = y_pred

# Save updated file (overwrite or create new one)
df.to_csv(file_path, index=False)

print(f"‚úÖ Predictions added and saved to {file_path} (shape={df.shape})")

Original shape: (24022, 14)
Predictions length: 24022
‚úÖ Predictions added and saved to Result_Task1_800MHz_Tx_12.csv (shape=(24022, 14))
