In [4]:
import pandas as pd
import numpy as np
import time
import os
from tqdm.notebook import tqdm # Use tqdm.auto

# --- Configuration ---
RAW_DATA_FILE = 'dataset/5GDL.csv'
OUTPUT_FILE = 'result/merged_location_only_fingerprints.csv'
RESULTS_DIR = 'result'

# --- Column Names (!!!--ADJUST THESE TO MATCH YOUR CSV--!!!) ---
# TIME_COL = 'Time' # REMOVED
LAT_COL = 'Latitude'
LON_COL = 'Longitude'
# List ALL other columns you want to potentially merge/keep (NO Time column here)
PARAMETER_COLS = [
    'Technology_Mode', 'NR_UE_PCI_0', 'NR_UE_RSRP_0', 'NR_UE_RSRQ_0', 'NR_UE_SINR_0',
    'NR_UE_Nbr_PCI_0', 'NR_UE_Nbr_PCI_1', 'NR_UE_Nbr_PCI_2', 'NR_UE_Nbr_PCI_3', 'NR_UE_Nbr_PCI_4',
    'NR_UE_Nbr_RSRP_0', 'NR_UE_Nbr_RSRP_1', 'NR_UE_Nbr_RSRP_2', 'NR_UE_Nbr_RSRP_3', 'NR_UE_Nbr_RSRP_4',
    'NR_UE_Nbr_RSRQ_0', 'NR_UE_Nbr_RSRQ_1', 'NR_UE_Nbr_RSRQ_2', 'NR_UE_Nbr_RSRQ_3', 'NR_UE_Nbr_RSRQ_4',
    'NR_UE_Timing_Advance', 'NR_UE_Pathloss_DL_0', 'NR_UE_Throughput_PDCP_DL', 'App_Throughput_DL',
    'NR_UE_NACK_Rate_DL_0', 'NR_UE_Ack_As_Nack_DL_0', 'NR_UE_MCS_DL_0', 'NR_UE_RB_Num_DL_0',
    'NR_UE_Modulation_Avg_DL_0', 'NR_UE_RI_DL_0', 'NR_UE_BLER_DL_0', 'NR_UE_CCE_AggregationLev_0',
    'NR_UE_Power_Tx_PUSCH_0', 'NR_UE_Power_Tx_PRACH_0', 'NR_UE_NACK_Rate_UL_0',
    'NR_UE_RACH_Attempt', 'NR_UE_RACH_OK', 'NR_UE_RACH_Fail', 'NR_UE_RACH_Procedure_Count',
    'NR_UE_RRCReEstAttempt', 'NR_UE_RRCReEstFail', 'NR_UE_RRCReEst_EndResult',
    'NR_UE_RRCConnectionAttempt', 'NR_UE_RRCConnectionSetupOk', 'NR_UE_RRCConnectionComplete', # Removed extra comma
    'NR_UE_RRCConnectionDrop', 'NR_UE_RRCHOAttempt', 'NR_UE_RRCHOOK',
    'NR_RRC_MsgType', 'NAS_5GS_MM_MessageType', 'NAS_5GS_SM_MessageType'
    # Add any other columns present in your raw file that you want to keep/merge
]

# --- Aggregation Parameters ---
# MAX_TIME_DIFF_SECONDS = 2.0 # REMOVED

# --- Helper Function ---
def aggregate_static_points_no_time(df_sorted, lat_col, lon_col, param_cols):
    """ Aggregates rows with the exact same Lat/Lon. """
    aggregated_data = []
    passthrough_indices = []
    current_group_indices = []
    group_lat = np.nan
    group_lon = np.nan

    print("Iterating through sorted data to find static groups (Location Only)...")
    for i in tqdm(range(len(df_sorted)), desc="Processing Rows"):
        row = df_sorted.iloc[i]
        current_lat = row[lat_col]
        current_lon = row[lon_col]

        is_same_location = (current_lat == group_lat) and (current_lon == group_lon)

        # --- Check if current row continues the existing group ---
        if is_same_location:
            current_group_indices.append(i)
        else:
            # --- End of the previous group, process it ---
            if len(current_group_indices) > 1: # Only aggregate if group has > 1 record
                group_df = df_sorted.iloc[current_group_indices]
                agg_result = {}
                first_row_in_group = group_df.iloc[0]
                agg_result[lat_col] = first_row_in_group[lat_col]
                agg_result[lon_col] = first_row_in_group[lon_col]
                # ADD A COUNT COLUMN TO SEE HOW MANY ROWS WERE MERGED
                agg_result['merged_row_count'] = len(group_df)

                for col in param_cols:
                    # Handle case where column might not exist in the specific group_df (if passthrough)
                    if col in group_df.columns:
                        first_valid_value = group_df[col].dropna().iloc[0] if not group_df[col].dropna().empty else np.nan
                        agg_result[col] = first_valid_value
                    else:
                         agg_result[col] = np.nan # Column didn't exist in this group
                aggregated_data.append(agg_result)
            elif len(current_group_indices) == 1:
                passthrough_indices.append(current_group_indices[0])

            # --- Start a new group with the current row ---
            current_group_indices = [i]
            group_lat = current_lat
            group_lon = current_lon

    # --- Process the very last group ---
    if len(current_group_indices) > 1:
        group_df = df_sorted.iloc[current_group_indices]
        agg_result = {}
        first_row_in_group = group_df.iloc[0]
        agg_result[lat_col] = first_row_in_group[lat_col]
        agg_result[lon_col] = first_row_in_group[lon_col]
        agg_result['merged_row_count'] = len(group_df) # Add count
        for col in param_cols:
             if col in group_df.columns:
                first_valid_value = group_df[col].dropna().iloc[0] if not group_df[col].dropna().empty else np.nan
                agg_result[col] = first_valid_value
             else:
                 agg_result[col] = np.nan
        aggregated_data.append(agg_result)
    elif len(current_group_indices) == 1:
        passthrough_indices.append(current_group_indices[0])

    # --- Combine aggregated data and pass-through data ---
    df_aggregated = pd.DataFrame(aggregated_data)
    df_passthrough = df_sorted.loc[passthrough_indices].copy() # Make a copy
    # Add the count column to passthrough rows
    if not df_passthrough.empty:
        df_passthrough['merged_row_count'] = 1

    # Ensure column order consistency
    # Define order based on aggregated df columns which includes the new count col
    if not df_aggregated.empty:
        all_cols_order = df_aggregated.columns.tolist()
    elif not df_passthrough.empty:
        all_cols_order = df_passthrough.columns.tolist()
    else:
        all_cols_order = [] # Should not happen if input wasn't empty


    # Filter columns that actually exist in the dataframes before concat
    agg_cols = [col for col in all_cols_order if col in df_aggregated.columns]
    pass_cols = [col for col in all_cols_order if col in df_passthrough.columns]

    df_final = pd.concat([df_aggregated[agg_cols], df_passthrough[pass_cols]], ignore_index=True)

    # Re-sort by original index maybe? Or Lat/Lon? Time is gone.
    # Sorting by Lat/Lon keeps aggregated points together.
    df_final = df_final.sort_values(by=[LAT_COL, LON_COL]).reset_index(drop=True)

    return df_final

# --- Main Execution (Modified) ---
if __name__ == "__main__":
    # ... (Load Data - consider using na_values and low_memory=False) ...
    print(f"Starting data aggregation process (Location Only)...")
    start_run_time = time.time()
    os.makedirs(RESULTS_DIR, exist_ok=True)

    try:
        print(f"Loading raw data from: {RAW_DATA_FILE}")
        # Specify NA values and disable low_memory if DTypeWarnings persist
        common_na = ["N/A", "-", "--", "null", "", " "]
        df_raw = pd.read_csv(RAW_DATA_FILE, na_values=common_na, low_memory=False)
        print(f"Loaded {len(df_raw)} rows.")
    except FileNotFoundError: # ... handle errors ...
        print(f"ERROR: Raw data file not found at {RAW_DATA_FILE}")
        exit()
    except Exception as e:
        print(f"ERROR: Failed to load raw data. {e}")
        exit()


    print("Cleaning data and converting types (Time column ignored)...")
    # --- Drop Time column ---
    if 'Time' in df_raw.columns:
        df_raw = df_raw.drop(columns=['Time'])
        print("Time column dropped.")

    df_raw[LAT_COL] = pd.to_numeric(df_raw[LAT_COL], errors='coerce')
    df_raw[LON_COL] = pd.to_numeric(df_raw[LON_COL], errors='coerce')

    initial_rows = len(df_raw)
    df_raw.dropna(subset=[LAT_COL, LON_COL], inplace=True) # Drop only based on Lat/Lon
    if len(df_raw) < initial_rows:
        print(f"Dropped {initial_rows - len(df_raw)} rows with invalid Lat or Lon.")

    if df_raw.empty: #... handle empty df ...
        print("ERROR: No valid data remaining after cleaning Lat/Lon.")
        exit()


    # --- Sort Data (No Time) ---
    print(f"Sorting data by {LAT_COL}, {LON_COL}...")
    df_sorted = df_raw.sort_values(by=[LAT_COL, LON_COL]).reset_index(drop=True)

    # --- Perform Aggregation (No Time) ---
    df_merged = aggregate_static_points_no_time(df_sorted, LAT_COL, LON_COL, PARAMETER_COLS)

    print(f"Aggregation complete. Original rows (after Lat/Lon cleaning): {len(df_raw)}, Merged rows: {len(df_merged)}")

    # --- Add check for large merge counts ---
    if 'merged_row_count' in df_merged.columns:
        max_merged = df_merged['merged_row_count'].max()
        avg_merged = df_merged[df_merged['merged_row_count'] > 1]['merged_row_count'].mean()
        print(f"Max rows merged into one location: {max_merged}")
        print(f"Avg rows merged (for merged points): {avg_merged:.2f}")
        if max_merged > 100: # Arbitrary threshold
             print("WARNING: Very large groups detected based on location merge. Results might combine data from distant times. Inspect rows with high 'merged_row_count'.")


    # --- Save Result ---
    # ... (Save df_merged to OUTPUT_FILE) ...
    try:
        print(f"Saving merged data to: {OUTPUT_FILE}")
        # Ensure PARAMETER_COLS used for saving are correct
        save_cols = [LAT_COL, LON_COL, 'merged_row_count'] + [col for col in PARAMETER_COLS if col in df_merged.columns]
        df_merged[save_cols].to_csv(OUTPUT_FILE, index=False)
        print("Save successful.")
    except Exception as e:
        print(f"ERROR: Failed to save merged data. {e}")

    end_run_time = time.time() #... calculate duration ...
    print(f"Total execution time: {end_run_time - start_run_time:.2f} seconds.")

Starting data aggregation process (Location Only)...
Loading raw data from: dataset/5GDL.csv
Loaded 690188 rows.
Cleaning data and converting types (Time column ignored)...
Time column dropped.
Dropped 617710 rows with invalid Lat or Lon.
Sorting data by Latitude, Longitude...
Iterating through sorted data to find static groups (Location Only)...


Processing Rows:   0%|          | 0/72478 [00:00<?, ?it/s]

Aggregation complete. Original rows (after Lat/Lon cleaning): 72478, Merged rows: 1514
Max rows merged into one location: 558.0
Avg rows merged (for merged points): 47.87
Saving merged data to: result/merged_location_only_fingerprints.csv
Save successful.
Total execution time: 10.21 seconds.


In [None]:
# -*- coding: utf-8 -*-
"""
Script to load geospatial signal data from CSV, handle missing values,
plot locations on static and interactive maps, and perform filtering.
"""

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
from shapely.geometry import Point
import folium
import numpy as np # Used implicitly by pandas, good practice to import
import sys # For exiting script gracefully

# =============================================================================
# Configuration Section - MODIFY THESE VALUES
# =============================================================================

# --- Configuration ---
CSV_FILE_PATH = 'dataset/5GDL.csv' # <<< CHANGE TO YOUR CSV FILE PATH
LAT_COLUMN = 'Latitude'           # <<< CHANGE if your latitude column name is different
LON_COLUMN = 'Longitude'          # <<< CHANGE if your longitude column name is different
RSRP_COLUMN = 'NR_UE_RSRP_0'              # <<< CHANGE if your RSRP column name is different
SINR_COLUMN = 'NR_UE_SINR_0'
TIMESTAMP_COLUMN = 'Time'
RSSI_COLUMN = 'NR_UE_RSRQ_0'
# Add other columns you care about
OTHER_COLUMNS = ['NR_UE_SINR_0', 'NR_UE_Timing_Advance', '']

# --- Default Values for Missing Data ---
# Choose values unlikely to occur naturally in your data
DEFAULT_RSRP = -140.0
DEFAULT_RSSI = -120.0
DEFAULT_SINR = -20.0
# Add defaults for other columns if they might be missing and numeric
# DEFAULT_OTHER_SIGNAL = -999.0

# --- Map Settings ---
STATIC_MAP_FILENAME = 'signal_map_static.png'
FILTERED_MAP_FILENAME = 'signal_map_filtered.png'
INTERACTIVE_MAP_FILENAME = 'signal_map_interactive.html'
DEFAULT_ZOOM_START = 13 # Zoom level for Folium map

# --- Filtering Parameters ---
# Example 1: Find data near this point
TARGET_LATITUDE = 40.7132
TARGET_LONGITUDE = -74.0055

# Example 2: Filter by RSRP range
MIN_RSRP_THRESHOLD = -100
MAX_RSRP_THRESHOLD = -90

# =============================================================================
# Helper Function for Robust Numeric Conversion
# =============================================================================
def safe_to_numeric(series, column_name):
    """Converts a pandas Series to numeric, coercing errors, and reports issues."""
    original_dtype = series.dtype
    numeric_series = pd.to_numeric(series, errors='coerce')
    num_nan_introduced = numeric_series.isna().sum() - series.isna().sum()
    if num_nan_introduced > 0:
        print(f"  Warning: Column '{column_name}' had {num_nan_introduced} non-numeric values coerced to NaN.")
    if not pd.api.types.is_numeric_dtype(numeric_series) and not numeric_series.isna().all():
         # This case is rare after 'coerce', but good practice
         print(f"  Warning: Column '{column_name}' could not be fully converted to numeric (original dtype: {original_dtype}). Check data.")
    return numeric_series

# =============================================================================
# Main Script Logic
# =============================================================================
print("--- Starting Signal Data Processing ---")

# --- 1. Load Data ---
print(f"\n[Step 1/6] Loading data from '{CSV_FILE_PATH}'...")
try:
    df = pd.read_csv(CSV_FILE_PATH)
    print(f"  Successfully loaded CSV with {len(df)} rows and {len(df.columns)} columns.")
    print("  Columns found:", df.columns.tolist())
    # print("  First 5 rows (raw):\n", df.head()) # Uncomment for debugging

except FileNotFoundError:
    print(f"  Error: CSV file not found at '{CSV_FILE_PATH}'. Please check the path.")
    sys.exit(1) # Exit script
except Exception as e:
    print(f"  Error: An unexpected error occurred loading the CSV: {e}")
    sys.exit(1)

# --- Verify Essential Columns Exist ---
required_columns = [LAT_COLUMN, LON_COLUMN]
if RSRP_COLUMN: required_columns.append(RSRP_COLUMN)
missing_cols = [col for col in required_columns if col not in df.columns]
if missing_cols:
    print(f"  Error: Missing essential columns in CSV: {missing_cols}. Check configuration.")
    sys.exit(1)

# --- 2. Pre-process Data (Timestamp, Numeric Conversion, Fill NaNs) ---
print("\n[Step 2/6] Pre-processing data...")

# Convert Timestamp
if TIMESTAMP_COLUMN and TIMESTAMP_COLUMN in df.columns:
    try:
        df[TIMESTAMP_COLUMN] = pd.to_datetime(df[TIMESTAMP_COLUMN])
        print(f"  Converted '{TIMESTAMP_COLUMN}' to datetime objects.")
    except Exception as e:
        print(f"  Warning: Could not parse timestamp column '{TIMESTAMP_COLUMN}'. Error: {e}. Skipping conversion.")
else:
     if TIMESTAMP_COLUMN:
         print(f"  Info: Timestamp column '{TIMESTAMP_COLUMN}' not found in CSV.")

# Define columns to fill and their default values
columns_to_fill = {}
if RSRP_COLUMN in df.columns: columns_to_fill[RSRP_COLUMN] = DEFAULT_RSRP
if RSSI_COLUMN in df.columns: columns_to_fill[RSSI_COLUMN] = DEFAULT_RSSI
if SINR_COLUMN in df.columns: columns_to_fill[SINR_COLUMN] = DEFAULT_SINR
# Add other numeric columns needing defaults here:
# if 'OtherSignal' in df.columns: columns_to_fill['OtherSignal'] = DEFAULT_OTHER_SIGNAL

# Apply numeric conversion and fill NaNs
print("  Converting signal columns to numeric and filling missing values...")
for col, default_val in columns_to_fill.items():
    if col in df.columns:
        nan_count_before = df[col].isna().sum()
        # Convert to numeric first, handling potential non-numeric entries
        df[col] = safe_to_numeric(df[col], col)
        nan_count_after_coerce = df[col].isna().sum()

        if nan_count_after_coerce > 0:
            df[col].fillna(default_val, inplace=True)
            filled_count = nan_count_after_coerce - nan_count_before # Count NaNs filled (original + coerced)
            if filled_count > 0 :
                 print(f"    Filled {filled_count} NaN/invalid values in '{col}' with {default_val}.")
            else:
                 # This happens if all NaNs were introduced by coercion and then filled
                 if nan_count_after_coerce > 0:
                      print(f"    Filled {nan_count_after_coerce} NaN values (originally non-numeric) in '{col}' with {default_val}.")
        # else: # No NaNs found or introduced
        #     print(f"    No NaN values found or filled in '{col}'.") # Can be noisy, often omitted
    else:
        print(f"  Warning: Column '{col}' specified for filling not found in DataFrame.")

# print("  First 5 rows (after processing):\n", df.head()) # Uncomment for debugging

# --- 3. Create GeoDataFrame ---
print("\n[Step 3/6] Creating GeoDataFrame...")
try:
    # Ensure Lat/Lon are numeric, drop rows if not convertible
    df[LAT_COLUMN] = safe_to_numeric(df[LAT_COLUMN], LAT_COLUMN)
    df[LON_COLUMN] = safe_to_numeric(df[LON_COLUMN], LON_COLUMN)

    original_rows = len(df)
    df.dropna(subset=[LAT_COLUMN, LON_COLUMN], inplace=True)
    rows_dropped = original_rows - len(df)
    if rows_dropped > 0:
        print(f"  Warning: Dropped {rows_dropped} rows with invalid or missing Latitude/Longitude values.")

    if df.empty:
         print("  Error: No valid location data remaining after cleaning Lat/Lon. Cannot proceed.")
         sys.exit(1)

    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df[LON_COLUMN], df[LAT_COLUMN]),
        crs="EPSG:4326" # WGS84 Coordinate Reference System
    )
    print(f"  Successfully created GeoDataFrame with {len(gdf)} points.")
    # print(gdf.head()) # Uncomment for debugging

except KeyError as e:
     print(f"  Error: Missing required column for GeoDataFrame creation: {e}. Check LAT_COLUMN/LON_COLUMN names.")
     sys.exit(1)
except Exception as e:
    print(f"  Error: An unexpected error occurred creating the GeoDataFrame: {e}")
    sys.exit(1)

# --- 4. Plotting: Static Map ---
print(f"\n[Step 4/6] Generating static map ('{STATIC_MAP_FILENAME}')...")
try:
    fig, ax = plt.subplots(1, 1, figsize=(12, 12))

    # Determine color range, potentially excluding the default fill value
    color_col = RSRP_COLUMN
    plot_column_data = gdf[color_col]
    default_val = columns_to_fill.get(color_col) # Get default if defined

    # Calculate vmin/vmax from actual data, ignoring default fill value if specified
    valid_data_for_range = plot_column_data
    if default_val is not None:
        valid_data_for_range = plot_column_data[plot_column_data != default_val]

    vmin = valid_data_for_range.min() if not valid_data_for_range.empty else plot_column_data.min()
    vmax = valid_data_for_range.max() if not valid_data_for_range.empty else plot_column_data.max()


    gdf.plot(
        column=color_col,
        ax=ax,
        legend=True,
        markersize=15,
        cmap='viridis', # Good perceptually uniform colormap
        vmin=vmin,      # Set color limits based on data range
        vmax=vmax,
        legend_kwds={'label': f"{color_col} (dBm)",
                     'orientation': "horizontal"}
    )

    # Add Basemap
    try:
        print("  Adding basemap...")
        ctx.add_basemap(ax, crs=gdf.crs.to_string(), source=ctx.providers.OpenStreetMap.Mapnik, zoom='auto')
        print("  Basemap added successfully.")
    except Exception as e:
        print(f"  Warning: Could not add basemap. Plot will show points only. Error: {e}")

    ax.set_title(f'Signal Measurement Locations colored by {color_col}')
    ax.set_axis_off() # Hide lat/lon axes if basemap is present
    plt.tight_layout()
    plt.savefig(STATIC_MAP_FILENAME, dpi=300)
    print(f"  Static map saved as '{STATIC_MAP_FILENAME}'.")
    # plt.show() # Optionally display the plot directly

except Exception as e:
    print(f"  Error: An error occurred during static plotting: {e}")

plt.close(fig) # Close the figure to free memory

# --- 5. Filtering and Querying ---
print("\n[Step 5/6] Performing Filtering and Querying...")

# Example 1: Find RSRP (and other data) near a specific Lat/Lon
print(f"\n  --- Query 1: Data nearest to ({TARGET_LATITUDE}, {TARGET_LONGITUDE}) ---")
try:
    target_point = Point(TARGET_LONGITUDE, TARGET_LATITUDE)
    # Ensure target point is valid
    if not target_point.is_valid:
         print(f"  Error: Invalid target coordinates specified.")
    else:
        # Calculate distances (in degrees for EPSG:4326)
        distances = gdf.geometry.distance(target_point)

        if not distances.empty:
            nearest_index = distances.idxmin()
            nearest_data = gdf.loc[nearest_index]

            print(f"  Data point closest to target:")
            print(f"    Actual Location (Lat, Lon): {nearest_data.geometry.y:.5f}, {nearest_data.geometry.x:.5f}")
            print(f"    Distance (approx degrees): {distances.min():.6f}") # Note: Degree distance != meters!
            # Display key values, checking if they exist and handling defaults
            for col, default in columns_to_fill.items():
                 if col in nearest_data:
                     val = nearest_data[col]
                     print(f"    {col}: {val}{' (Default)' if val == default else ''}")
            # Display timestamp if available
            if TIMESTAMP_COLUMN and TIMESTAMP_COLUMN in nearest_data and pd.notna(nearest_data[TIMESTAMP_COLUMN]):
                 print(f"    {TIMESTAMP_COLUMN}: {nearest_data[TIMESTAMP_COLUMN]}")
            # Display other configured columns
            # for col in OTHER_DATA_COLUMNS:
            #      if col in nearest_data and col not in columns_to_fill: # Avoid printing twice
            #           print(f"    {col}: {nearest_data[col]}")
        else:
            print(f"  No data points found to calculate nearest distance.")

except Exception as e:
    print(f"  Error: An error occurred finding the nearest point: {e}")


# Example 2: Find locations where RSRP is within a certain range
print(f"\n  --- Query 2: Locations where {RSRP_COLUMN} is between {MIN_RSRP_THRESHOLD} and {MAX_RSRP_THRESHOLD} dBm ---")
if RSRP_COLUMN not in gdf.columns:
     print(f"  Skipping query: RSRP column '{RSRP_COLUMN}' not found.")
else:
    try:
        # Apply the filter using boolean indexing
        filtered_gdf = gdf[
            (gdf[RSRP_COLUMN] >= MIN_RSRP_THRESHOLD) &
            (gdf[RSRP_COLUMN] <= MAX_RSRP_THRESHOLD)
        ]

        print(f"  Found {len(filtered_gdf)} locations matching the criteria.")

        if not filtered_gdf.empty:
            # Display first few matching locations
            display_cols = [LAT_COLUMN, LON_COLUMN, RSRP_COLUMN]
            if TIMESTAMP_COLUMN in filtered_gdf.columns: display_cols.append(TIMESTAMP_COLUMN)
            print("  First 5 matching locations:\n", filtered_gdf[display_cols].head())

            # Optional: Plot only the filtered points on a new map
            print(f"  Generating filtered static map ('{FILTERED_MAP_FILENAME}')...")
            try:
                fig_filtered, ax_filtered = plt.subplots(1, 1, figsize=(10, 10))
                # Plot all points faintly for context
                gdf.plot(ax=ax_filtered, color='grey', markersize=5, alpha=0.3, label='All Data')
                # Highlight filtered points
                filtered_gdf.plot(ax=ax_filtered, color='red', markersize=25, label=f'{RSRP_COLUMN} {MIN_RSRP_THRESHOLD} to {MAX_RSRP_THRESHOLD}')
                # Add Basemap
                try:
                    ctx.add_basemap(ax_filtered, crs=gdf.crs.to_string(), source=ctx.providers.OpenStreetMap.Mapnik, zoom='auto')
                except Exception as e:
                    print(f"    Warning: Could not add basemap to filtered plot. Error: {e}")

                ax_filtered.set_title(f'Locations with {RSRP_COLUMN} in range [{MIN_RSRP_THRESHOLD}, {MAX_RSRP_THRESHOLD}] dBm')
                ax_filtered.set_axis_off()
                plt.legend()
                plt.tight_layout()
                plt.savefig(FILTERED_MAP_FILENAME, dpi=300)
                print(f"  Filtered map saved as '{FILTERED_MAP_FILENAME}'.")
                # plt.show() # Optionally display
                plt.close(fig_filtered) # Close figure

            except Exception as e:
                print(f"  Error: An error occurred plotting the filtered map: {e}")
                if 'fig_filtered' in locals(): plt.close(fig_filtered) # Ensure cleanup if error occurred mid-plot

        else:
            print("  No locations matched the filtering criteria.")

    except Exception as e:
        print(f"  Error: An error occurred during RSRP filtering: {e}")


# --- 6. Plotting: Interactive Map (Folium) ---
print(f"\n[Step 6/6] Generating interactive map ('{INTERACTIVE_MAP_FILENAME}')...")
try:
    # Create map centered around the mean location
    map_center = [gdf[LAT_COLUMN].mean(), gdf[LON_COLUMN].mean()]
    interactive_map = folium.Map(location=map_center, zoom_start=DEFAULT_ZOOM_START)

    # Add points to the map
    for idx, row in gdf.iterrows():
        # Create popup text with relevant info
        popup_html = f"<b><u>Location {idx}</u></b><br>"
        popup_html += f"<b>Lat:</b> {row[LAT_COLUMN]:.5f}<br>"
        popup_html += f"<b>Lon:</b> {row[LON_COLUMN]:.5f}<br>"

        # Add signal values, indicating if it was a default fill
        all_signal_cols = list(columns_to_fill.keys()) # Includes RSRP, RSSI, SINR etc. if defined
        for col in all_signal_cols:
            if col in row:
                 val = row[col]
                 default_val = columns_to_fill.get(col) # Get the default for comparison
                 popup_html += f"<b>{col}:</b> {val}{' (Default)' if val == default_val else ''}<br>"

        # Add other data columns specified
        # for col in OTHER_DATA_COLUMNS:
        #     if col in row and col not in columns_to_fill: # Avoid adding signals twice
        #          popup_html += f"<b>{col}:</b> {row[col]}<br>"

        # Add timestamp if available
        if TIMESTAMP_COLUMN and TIMESTAMP_COLUMN in row and pd.notna(row[TIMESTAMP_COLUMN]):
             popup_html += f"<b>{TIMESTAMP_COLUMN}:</b> {row[TIMESTAMP_COLUMN]}"

        # Determine marker color based on RSRP (customize this logic if needed)
        rsrp_val = row.get(RSRP_COLUMN) # Use .get for safety if RSRP_COLUMN somehow missing
        rsrp_default = columns_to_fill.get(RSRP_COLUMN)
        color = 'grey' # Default/Unknown

        if rsrp_val is not None:
            if rsrp_val == rsrp_default:
                 color = 'darkgrey' # Specific color for default/missing RSRP
            elif rsrp_val > -95:
                 color = 'green'
            elif rsrp_val > -105:
                 color = 'orange'
            elif rsrp_val <= -105: # Catches values below -105
                 color = 'red'
            # Add more elif conditions for finer granularity if desired

        # Add marker to map
        folium.CircleMarker(
            location=[row[LAT_COLUMN], row[LON_COLUMN]],
            radius=5, # Adjust size
            popup=folium.Popup(popup_html, max_width=300),
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7
        ).add_to(interactive_map)

    # Save the map to an HTML file
    interactive_map.save(INTERACTIVE_MAP_FILENAME)
    print(f"  Interactive map saved as '{INTERACTIVE_MAP_FILENAME}'. Open this file in your web browser.")

except Exception as e:
    print(f"  Error: An error occurred generating the interactive map: {e}")


print("\n--- Script finished ---")

--- Starting Signal Data Processing ---

[Step 1/6] Loading data from 'dataset/5GDL.csv'...


  df = pd.read_csv(CSV_FILE_PATH)


  Successfully loaded CSV with 690188 rows and 56 columns.
  Columns found: ['Message', 'Time', 'Longitude', 'Latitude', 'Technology_Mode', 'NR_UE_PCI_0', 'NR_UE_RSRP_0', 'NR_UE_RSRQ_0', 'NR_UE_SINR_0', 'NR_UE_Nbr_PCI_0', 'NR_UE_Nbr_PCI_1', 'NR_UE_Nbr_PCI_2', 'NR_UE_Nbr_PCI_3', 'NR_UE_Nbr_PCI_4', 'NR_UE_Nbr_RSRP_0', 'NR_UE_Nbr_RSRP_1', 'NR_UE_Nbr_RSRP_2', 'NR_UE_Nbr_RSRP_3', 'NR_UE_Nbr_RSRP_4', 'NR_UE_Nbr_RSRQ_0', 'NR_UE_Nbr_RSRQ_1', 'NR_UE_Nbr_RSRQ_2', 'NR_UE_Nbr_RSRQ_3', 'NR_UE_Nbr_RSRQ_4', 'NR_UE_Timing_Advance', 'NR_UE_Pathloss_DL_0', 'NR_UE_Throughput_PDCP_DL', 'App_Throughput_DL', 'NR_UE_NACK_Rate_DL_0', 'NR_UE_Ack_As_Nack_DL_0', 'NR_UE_MCS_DL_0', 'NR_UE_RB_Num_DL_0', 'NR_UE_Modulation_Avg_DL_0', 'NR_UE_RI_DL_0', 'NR_UE_BLER_DL_0', 'NR_UE_CCE_AggregationLev_0', 'NR_UE_Power_Tx_PUSCH_0', 'NR_UE_Power_Tx_PRACH_0', 'NR_UE_NACK_Rate_UL_0', 'NR_UE_RACH_Attempt', 'NR_UE_RACH_OK', 'NR_UE_RACH_Fail', 'NR_UE_RACH_Procedure_Count', 'NR_UE_RRCReEstAttempt', 'NR_UE_RRCReEstFail', 'NR_UE_RRCR

  df[TIMESTAMP_COLUMN] = pd.to_datetime(df[TIMESTAMP_COLUMN])
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(default_val, inplace=True)


  Successfully created GeoDataFrame with 72478 points.

[Step 4/6] Generating static map ('signal_map_static.png')...
  Adding basemap...
  Basemap added successfully.
  Static map saved as 'signal_map_static.png'.

[Step 5/6] Performing Filtering and Querying...

  --- Query 1: Data nearest to (40.7132, -74.0055) ---
  Data point closest to target:
    Actual Location (Lat, Lon): 41.10568, 29.01534
    Distance (approx degrees): 103.021588
    NR_UE_RSRP_0: -140.0 (Default)
    NR_UE_RSRQ_0: -120.0 (Default)
    NR_UE_SINR_0: -20.0 (Default)
    Time: 2025-03-14 12:27:08

  --- Query 2: Locations where NR_UE_RSRP_0 is between -100 and -90 dBm ---
  Found 196 locations matching the criteria.
  First 5 matching locations:
      Latitude  Longitude  NR_UE_RSRP_0                Time
490  41.10723   29.02949         -90.4 2025-03-14 12:14:40
561  41.10722   29.02947         -90.1 2025-03-14 12:14:41
633  41.10724   29.02944         -90.7 2025-03-14 12:14:42
704  41.10725   29.02941        


  distances = gdf.geometry.distance(target_point)


  Filtered map saved as 'signal_map_filtered.png'.

[Step 6/6] Generating interactive map ('signal_map_interactive.html')...
  Interactive map saved as 'signal_map_interactive.html'. Open this file in your web browser.

--- Script finished ---
