In [1]:
import math
import geopandas as gpd
from shapely.geometry import Point, Polygon
import pandas as pd
import numpy as np
from fiona.drvsupport import supported_drivers
from simplekml import Kml, Color
supported_drivers['KML'] = 'rw'
supported_drivers['libkml'] = 'rw' # enable KML support which is disabled by default
supported_drivers['LIBKML'] = 'rw' # enable KML support which is disabled 
import numpy as np
import geopandas as gpd
from simplekml import Kml, Color
import os
import time
from geopy.distance import geodesic
from sklearn.cluster import DBSCAN
from shapely.ops import unary_union

In [2]:
def generate_bins(geom, bin_size=10):

    x_ref, y_ref = 193602, 2602072
    xmin, ymin, xmax, ymax = geom.bounds
    # To shift the polygon to math the whole grid
    xmin = math.floor((xmin-x_ref)/bin_size)*bin_size+x_ref
    xmax = math.ceil((xmax-x_ref)/bin_size)*bin_size+x_ref
    ymin = math.floor((ymin-y_ref)/bin_size)*bin_size+y_ref
    ymax = math.ceil((ymax-y_ref)/bin_size)*bin_size+y_ref
    # Calculate the number of bins in each dimension
    xbins = int((xmax - xmin) / bin_size)
    ybins = int((ymax - ymin) / bin_size)
    # Create arrays of x and y coordinates for bins
    x_coords = np.arange(xmin, xmin + xbins * bin_size, bin_size)
    y_coords = np.arange(ymin, ymin + ybins * bin_size, bin_size)
    # Use NumPy's meshgrid to generate all combinations of x and y coordinates
    x_grid, y_grid = np.meshgrid(x_coords, y_coords)
    # Flatten the arrays and create bin polygons using vectorized operations
    x_flat = x_grid.flatten()
    y_flat = y_grid.flatten()
    bin_polygons = [
        Polygon([
            (x, y), (x + bin_size, y), (x + bin_size, y + bin_size), (x, y + bin_size)
        ]) for x, y in zip(x_flat, y_flat)
    ]
    return bin_polygons

In [3]:
## Get border file and raw data file
# area_name = "DN_KCN_AN_DON"
# area_name = "DN_KCN_HOA_KHANH"
# area_name = "QN_KCN_VSIP"
# area_name = "KH_QL1"
area_name = "GLCS_TEST"
date_str = '291023'
bin_size = 10

input_polygon_file = f"../Polygon/{area_name}.kml"
polygon_df = gpd.read_file(input_polygon_file)
          
# input_raw_file = f"../Raw/{area_name}/{area_name}_{date_str}.csv"
# df_raw = pd.read_csv(input_raw_file)

folder_path = f"../Raw/{area_name}/Pilot_{date_str}"
print(f"Reading from multiple input raw files in {folder_path}")
df_list = []
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, low_memory=False)
        df_list.append(df)
df_raw = pd.concat(df_list, ignore_index=True)

Reading from multiple input raw files in ../Raw/GLCS_TEST/Pilot_291023


In [5]:
## Divide bins and locate points
print(f"***===RUNNING:{area_name}, {bin_size}*{bin_size} grid ==***")
### Read a Polygon and divide
polygon_df = polygon_df.to_crs('EPSG:32648')
bins_polygons = [generate_bins(geom, bin_size) for geom in polygon_df['geometry']]
bins_polygons = [bin_poly for sublist in bins_polygons for bin_poly in sublist]
# Create the GeoDataFrame for bins
bins_df = gpd.GeoDataFrame({'geometry': bins_polygons}, crs=polygon_df.crs)
bins_df['centroid'] = bins_df['geometry'].centroid
bins_df_4326 = bins_df.to_crs("EPSG:4326")
bins_df_4326['centroid'] = bins_df_4326['centroid'].to_crs("EPSG:4326")
bins_df_4326['longitude'] = bins_df_4326['centroid'].x
bins_df_4326['latitude'] = bins_df_4326['centroid'].y
bins_df_4326['bin_id'] = "bin" + "_" +bins_df_4326['latitude'].astype(str)+ "_" + bins_df_4326['longitude'].astype(str)
bins_df_4326 = bins_df_4326[['bin_id', 'geometry']]
print('==Complete dividing bins')
### Load raw data file
df_point = df_raw.copy()
df_point_columns = [
    'Start Time',
    'eNodeB',
    'EARFCN (DL)',
    'EARFCN (UL)',
    'Physical Cell ID',
    'Latitude',
    'Longitude',
    'UL Volume (kB)',
    'DL Volume (kB)',
    'CQI 0',
    'CQI 1',
    'CQI 2',
    'CQI 3',
    'CQI 4',
    'CQI 5',
    'CQI 6',
    'CQI 7',
    'CQI 8',
    'CQI 9',
    'CQI 10',
    'CQI 11',
    'CQI 12',
    'CQI 13',
    'CQI 14',
    'CQI 15',
    'Serving Cell Label',
    'Serving Cell RSRP',
    'Best Cell Label',
    'Best Cell RSRP',
    'Second Best Cell Label',
    'Second Best Cell RSRP',
    'Third Best Cell Label',
    'Third Best Cell RSRP',
    'Fourth Best Cell Label',
    'Fourth Best Cell RSRP',
    'Fifth Best Cell Label',
    'Fifth Best Cell RSRP',
    'Sixth Best Cell Label',
    'Sixth Best Cell RSRP',
]
df_point = df_point[df_point_columns]

### Locate bins for each sample
bins_df_4326.reset_index(inplace=True)
polygonDF = bins_df_4326.copy()
polygonDF = polygonDF[['bin_id','geometry']]
# Create Point Dataframe
geometric_points = []
for xy in zip(df_point['Longitude'], df_point['Latitude']):
    geometric_points.append(Point(xy))

pointDF = gpd.GeoDataFrame(df_point,
                                crs = {'init': 'epsg:4326'}, 
                                geometry = geometric_points
                                )
pointDF = pointDF.to_crs(polygonDF.crs)
# Join 2 DFs
joinDF = gpd.sjoin(pointDF,polygonDF, how='inner', predicate='within')
joinDF['polygon'] = joinDF['index_right'].map(polygonDF['geometry'])
print('==Complete Locating point')

### Preprocessing
df_segment = joinDF.copy()
df_segment = df_segment[df_segment['polygon'].notna()]
df_segment = df_segment.dropna(subset='Best Cell Label')
df_segment['polygon_str'] = df_segment['polygon'].astype(str)

***===RUNNING:DNHC_HAI_CHAU_2, 50*50 grid ==***
==Complete dividing bins


  in_crs_string = _prepare_from_proj_string(in_crs_string)


==Complete Locating point


## TEST

In [6]:
df_rsrp = df_segment[["polygon_str","bin_id", "Best Cell RSRP"]]
df_rsrp_median = df_rsrp.groupby(['polygon_str','bin_id']).median()
df_rsrp_count = df_rsrp.groupby(['polygon_str','bin_id']).count()
df_rsrp = df_rsrp_median.rename(columns={'Best Cell RSRP': 'Median RSRP'})
df_rsrp['No.Samples'] = df_rsrp_count['Best Cell RSRP']
df_rsrp.reset_index(inplace=True)

In [7]:
print(df_rsrp[df_rsrp['Median RSRP'] < -108] )

                                           polygon_str  \
143  POLYGON ((108.217172 16.065633, 108.217639 16....   

                                       bin_id  Median RSRP  No.Samples  
143  bin_16.065854789398973_108.2174094940039       -109.5        3454  


In [8]:
df_rsrp

Unnamed: 0,polygon_str,bin_id,Median RSRP,No.Samples
0,"POLYGON ((108.213475 16.067945, 108.213942 16....",bin_16.068167438074557_108.2137118417687,-81.5,111
1,"POLYGON ((108.213482 16.068397, 108.213949 16....",bin_16.068618740864853_108.21371909566663,-96.0,578
2,"POLYGON ((108.213489 16.068848, 108.213956 16....",bin_16.06907004363297_108.21372634979565,-90.5,401
3,"POLYGON ((108.213497 16.069299, 108.213963 16....",bin_16.069521346378895_108.21373360415573,-91.5,1147
4,"POLYGON ((108.213504 16.069751, 108.213971 16....",bin_16.069972649102635_108.21374085874692,-89.5,1990
...,...,...,...,...
164,"POLYGON ((108.217668 16.067431, 108.218135 16....",bin_16.067652972038463_108.21790528286542,-88.5,1332
165,"POLYGON ((108.21807 16.063362, 108.218536 16.0...",bin_16.063584241190863_108.21830665856783,-83.5,7304
166,"POLYGON ((108.218099 16.065167, 108.218565 16....",bin_16.06538944480779_108.21833570825554,-88.5,2174
167,"POLYGON ((108.218106 16.065619, 108.218573 16....",bin_16.06584074565654_108.21834297125595,-91.5,5001
