# Datascience with Ericsson

In [1]:
# help notebook find src module
import sys

sys.path.append(".")


### Dokonaj konwersji dostępnych danych z drivetestów na DataFrame

In [2]:
from src.cell_pooler import CellPooler
from src.helper import read_all_data

raw_data = read_all_data("./data/krakow")
extr = CellPooler(raw_data)

data = extr.points
data.info()
data


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1673 entries, 0 to 1881
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   time         1673 non-null   datetime64[ns]
 1   latitude     1673 non-null   float64       
 2   longitude    1673 non-null   float64       
 3   accuracy     1673 non-null   float64       
 4   pci          1673 non-null   object        
 5   bts_cell_id  1673 non-null   object        
 6   lte          1673 non-null   object        
dtypes: datetime64[ns](1), float64(3), object(3)
memory usage: 104.6+ KB


Unnamed: 0,time,latitude,longitude,accuracy,pci,bts_cell_id,lte
0,2022-10-20 08:08:52,50.109425,19.818191,1100.000,[397],{260-03},"[{'signal': {'rssi': -53, 'rsrp': -120, 'rsrq'..."
2,2022-10-20 08:09:16,50.102181,19.826999,1000.000,[397],{260-03},"[{'signal': {'rssi': -53, 'rsrp': -120, 'rsrq'..."
4,2022-10-20 08:09:41,50.098707,19.831510,1200.000,[397],{260-03},"[{'signal': {'rssi': -53, 'rsrp': -120, 'rsrq'..."
5,2022-10-20 08:09:51,50.098707,19.831510,1200.000,[397],{260-03},"[{'signal': {'rssi': -53, 'rsrp': -120, 'rsrq'..."
12,2022-10-20 08:11:03,50.081862,19.862988,197.784,[175],{260-03},"[{'signal': {'rssi': inf, 'rsrp': -112, 'rsrq'..."
...,...,...,...,...,...,...,...
1877,2022-10-20 08:19:20,50.018845,19.890238,13.697,"[483, 95]","{260-02, 260-06}","[{'signal': {'rssi': -51, 'rsrp': -74, 'rsrq':..."
1878,2022-10-20 08:19:30,50.018989,19.890363,12.740,"[483, 95]","{260-02, 260-06}","[{'signal': {'rssi': -51, 'rsrp': -70, 'rsrq':..."
1879,2022-10-20 08:19:40,50.019089,19.890472,12.436,"[483, 95]","{260-02, 260-06}","[{'signal': {'rssi': -51, 'rsrp': -69, 'rsrq':..."
1880,2022-10-20 08:19:50,50.019237,19.890488,13.128,"[483, 95]","{260-02, 260-06}","[{'signal': {'rssi': -51, 'rsrp': -77, 'rsrq':..."


### Zwizualizuj punkty pomiarowe na interaktywnej mapie

In [3]:
import plotly.express as px

# mapbox_token: str = os.environ.get("MAPBOX_TOKEN")

fig = px.scatter_mapbox(
    data,
    lat="latitude",
    lon="longitude",
)
fig.update_geos(fitbounds="locations")
fig.update_layout(
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
    mapbox={
        "style": "open-street-map",
        # "accesstoken": mapbox_token, # open-street-map does not require a token
    },
)
fig.show()


### Kryteria doboru danych

Przyklady:
- Wyświetl wszystkie punkty dla PCI X
- Wyświetl wszystkie punkty dla MNC Y
- Wyświetl wszystkie punkty dla PCI X, pokolorowane wg RSRP/RSRQ/RSSI/TA
- Wyświetl wszystkie punkty dla EARFCN Z

In [4]:
# TODO: add filters in filters.py


### Mimika eventów measurementowych

- Zaimplementuj funkcjonalność, która wygeneruje DataFrame zawierającą potencjalne eventy measurementowe i zwizualizuje ją
- Zaimplementuj sterowalne parametry eventów
- Ogranicz się do eventów A1-A5

In [5]:
import pandas as pd

# params to detect events
rsrp_th = [-140, -44]
rsrq_th = [-19.5, -3]
offset = [-15, 15]

events_data = []
size = data.shape[0]
for i, row in data.iterrows():
    cells = row["lte"]
    if cells and 0 < i < size:
        # present cells - ta should be close to 0, ex. 0-10, mcc and mnc should exist
        scell = None
        for scell_index, cell_info in enumerate(cells):
            if cell_info["identity"]["mMnc"] is not None:
                scell = cell_info
                break

        # previous cells
        prev_scell = None
        prev_cells = data.iloc[i - 1]["lte"]
        for cell_info in prev_cells:
            if cell_info["identity"]["mMnc"] is not None:
                prev_scell = cell_info
                break

        assert scell is not None
        assert prev_scell is not None

        scell_rsrp = scell["signal"]["rsrp"]
        scell_rsrq = scell["signal"]["rsrq"]
        pscell_rsrp = prev_scell["signal"]["rsrp"]
        pscell_rsrq = prev_scell["signal"]["rsrq"]

        def is_a1_event():
            return ((scell_rsrp > rsrp_th[0]) and (pscell_rsrp < rsrp_th[0])) or (
                (scell_rsrq > rsrq_th[0]) and (pscell_rsrq < rsrq_th[0])
            )

        def is_a2_event():
            return ((scell_rsrp < rsrp_th[0]) and (pscell_rsrp > rsrp_th[0])) or (
                (scell_rsrq < rsrq_th[0]) and (pscell_rsrq > rsrq_th[0])
            )

        events_detected: list[str] = []
        if is_a1_event():
            events_detected.append("A1")

        if is_a2_event():
            events_detected.append("A2")

        for j, ncell in enumerate(cells):
            if j == scell_index:  # shouldn't check serving cell against itself
                continue

            ncell_rsrp = ncell["signal"]["rsrp"]
            ncell_rsrq = ncell["signal"]["rsrq"]

            def is_a3_event():
                return (ncell_rsrp >= (scell_rsrp + offset[1])) or (
                    ncell_rsrq >= (scell_rsrq + offset[1])
                )

            def is_a4_event():
                return (ncell_rsrp >= rsrp_th[0]) or (ncell_rsrq >= rsrq_th[0])

            def is_a5_event():
                return ((scell_rsrp < rsrp_th[0]) and (ncell_rsrp > rsrp_th[0])) or (
                    (scell_rsrq < rsrq_th[0]) and (ncell_rsrq > rsrq_th[0])
                )

            if is_a3_event():
                events_detected.append("A3")

            if is_a4_event():
                events_detected.append("A4")

            if is_a5_event():
                events_detected.append("A5")

        for ev in set(events_detected):
            events_data.append(
                [
                    row["longitude"],
                    row["latitude"],
                    ev,
                    row["lte"][scell_index]["signal"]["rsrp"],
                    row["lte"][scell_index]["signal"]["rsrq"],
                ]
            )

events_data = pd.DataFrame(
    events_data, columns=["longitude", "latitude", "event", "rsrp", "rsrq"]
)
events_data


Unnamed: 0,longitude,latitude,event,rsrp,rsrq
0,19.831510,50.098707,A2,-120.0,-21
1,19.862988,50.081862,A4,-112.0,-17
2,19.877384,50.083275,A4,-112.0,-17
3,19.884729,50.083418,A1,-112.0,-17
4,19.884729,50.083418,A4,-112.0,-17
...,...,...,...,...,...
1518,19.950884,50.047014,A4,-91.0,-13
1519,19.951149,50.047081,A4,-83.0,-13
1520,19.951161,50.046992,A4,-82.0,-13
1521,19.948222,50.045390,A4,-93.0,-14


In [6]:
event_types = ["A1", "A2", "A3", "A4", "A5"]
colors = ["black", "blue", "forestgreen", "red", "magenta"]
event_colors_mapper = dict(zip(event_types, colors))

fig = px.scatter_mapbox(
    data_frame=events_data,
    lat="latitude",
    lon="longitude",
    title="A1-A5 Events",
    category_orders={"event": event_types},  # sort labels in legend
    color="event",  # set the color based on string in 'event' column
    color_discrete_map=event_colors_mapper,  # because 'event' column does not store colors directly, specify a mapper event->color
    hover_name="event",
    hover_data={
        "longitude": True,
        "latitude": True,
        "rsrp": True,
        "rsrq": True,
        "event": False,
    },
    width=1200,
    height=600,
)

fig.update_geos(fitbounds="locations")
fig.update_layout(
    margin={"r": 30, "t": 30, "l": 0, "b": 0},
    mapbox={
        "style": "open-street-map",
    },
)
fig.show()


### Kategoryzacja danych

- Grupowanie zarejestrowanych pomiarów przy pomocy współrzędnych geograficznych nie jest ‘wygodne’ - powoduje to utrudnienia implementacyjne podczas aplikowania modeli do analizy danych
- Zaimplementuj funkcjonalność, która wygeneruje siatkę kwadratów o zadanej wielkości, w warunkach brzegowych wybranej mapy.
- Każdy kwadrat ma być ograniczony współrzędnymi {x1, y1}, {x2, y2}
- Każdy kwadrat ma mieć kolejny unikatowy identyfikator
- Identyfikator ten ma być kategorią (label) danego punktu pomiarowego leżącego w granicach wygenerowanego kwadratu
- Podczas implementacji uwzględnij zarówno kategoryzację danych surowych, jak i mimiki eventów.

In [28]:
# GRID SETTINGS
PADDING: float = 0.01  # space between edges of a grid and the most distant points from the center of the grid, measured in longitude/latitude degrees
CHUNKS_COUNT: int = (
    10  # number of chunks per axis so total number of chunks is CHUNKS_COUNT^2
)


In [29]:
min_longitude, max_longitude = (
    events_data["longitude"].min() - PADDING,
    events_data["longitude"].max() + PADDING,
)
min_latitude, max_latitude = (
    events_data["latitude"].min() - PADDING,
    events_data["latitude"].max() + PADDING,
)


def pattern(min_long, min_lat, max_long, max_lat):
    return [
        [min_long, min_lat],
        [max_long, min_lat],
        [max_long, max_lat],
        [min_long, max_lat],
        [min_long, min_lat],
    ]


width: float = max_longitude - min_longitude
height: float = max_latitude - min_latitude

chunk_width = width / CHUNKS_COUNT
chunk_height = height / CHUNKS_COUNT

gridlines = []  # storage for all them chonks

# columns
for i in range(CHUNKS_COUNT):
    min_long = min_longitude + i * chunk_width
    max_long = min_longitude + (i + 1) * chunk_width
    chunks = [pattern(min_long, min_latitude, max_long, max_latitude)]
    gridlines.extend(chunks)

# rows
for i in range(CHUNKS_COUNT):
    min_lat = min_latitude + i * chunk_height
    max_lat = min_latitude + (i + 1) * chunk_height
    chunks = [pattern(min_longitude, min_lat, max_longitude, max_lat)]
    gridlines.extend(chunks)

sources = [
    {
        "id": "classification grid",
        "type": "Feature",
        "geometry": {
            "type": "Polygon",
            "coordinates": gridlines,
        },
    }
]

layers = [
    {
        "type": "line",
        "line": {"width": 1},
        "sourcetype": "geojson",
        "source": {
            "type": "Feature",
            "geometry": {
                "type": "Polygon",
                "coordinates": gridlines,
            },
        },
        "color": "black",
    }
]

fig.update_layout(
    margin={"r": 30, "t": 30, "l": 0, "b": 0},
    mapbox={"layers": layers},
)

fig.show()


In [32]:
# append label column to events_data
def coordinates_to_chunk_label(longitude: float, latitude: float) -> tuple[int, int]:
    label_x = int((longitude - min_longitude) // chunk_width)
    label_y = int((latitude - min_latitude) // chunk_height)
    return label_x, label_y


events_data["chunk"] = events_data.apply(
    lambda event: coordinates_to_chunk_label(event["longitude"], event["latitude"]),
    axis=1,
)

# for raw data, some points may actually lie outside of the grid
data["chunk"] = data.apply(
    lambda point: coordinates_to_chunk_label(point["longitude"], point["latitude"]),
    axis=1,
)

print(events_data)
print(data)


      longitude   latitude event   rsrp  rsrq   chunk
0     19.831510  50.098707    A2 -120.0   -21  (0, 8)
1     19.862988  50.081862    A4 -112.0   -17  (2, 7)
2     19.877384  50.083275    A4 -112.0   -17  (2, 7)
3     19.884729  50.083418    A1 -112.0   -17  (3, 7)
4     19.884729  50.083418    A4 -112.0   -17  (3, 7)
...         ...        ...   ...    ...   ...     ...
1518  19.950884  50.047014    A4  -91.0   -13  (6, 3)
1519  19.951149  50.047081    A4  -83.0   -13  (6, 3)
1520  19.951161  50.046992    A4  -82.0   -13  (6, 3)
1521  19.948222  50.045390    A4  -93.0   -14  (6, 3)
1522  19.949544  50.046069    A4  -88.0   -15  (6, 3)

[1523 rows x 6 columns]
                    time   latitude  longitude  accuracy        pci  \
0    2022-10-20 08:08:52  50.109425  19.818191  1100.000      [397]   
2    2022-10-20 08:09:16  50.102181  19.826999  1000.000      [397]   
4    2022-10-20 08:09:41  50.098707  19.831510  1200.000      [397]   
5    2022-10-20 08:09:51  50.098707  19.831

In [35]:
# TODO: copied from event mimicing, pls extract to some func
event_types = ["A1", "A2", "A3", "A4", "A5"]
colors = ["black", "blue", "forestgreen", "red", "magenta"]
event_colors_mapper = dict(zip(event_types, colors))

fig = px.scatter_mapbox(
    data_frame=events_data,
    lat="latitude",
    lon="longitude",
    title="A1-A5 Events",
    category_orders={"event": event_types},  # sort labels in legend
    color="event",  # set the color based on string in 'event' column
    color_discrete_map=event_colors_mapper,  # because 'event' column does not store colors directly, specify a mapper event->color
    hover_name="event",
    hover_data={
        "longitude": True,
        "latitude": True,
        "rsrp": True,
        "rsrq": True,
        "event": False,
        "chunk": True,
    },
    width=1200,
    height=600,
)

fig.update_geos(fitbounds="locations")
fig.update_layout(
    margin={"r": 30, "t": 30, "l": 0, "b": 0},
    mapbox={"style": "open-street-map", "layers": layers},
)
fig.show()


### Określanie histerezy zasięgu

- Na podstawie pomiarów, wyznacz fizyczne warunki brzegowe histerezy zachodzącej na pokrywającego się zasięgu cellek.
- Wyrysuj (zmień kolory punktów pomiarowych) na mapie obszary pokrycia oraz wyznacz threshold dostępnych cellek. Użyj osobnych warstw (osobny trace na obszarze rysowania) dla każdej cellki.
- Wykonaj zadanie w dwóch wariantach - dla danych surowych oraz dla wygenerowanych wcześniej eventów
- Cel zadania - optymalizacja thresholdów eventowych per eNodeB - jaki threshold ustawić w eNodeB, żeby uzyskać największą liczbę eventów pomiarowych wśród dostępnych cellek?

In [8]:
# TODO

# jak zrobic:
# dynamicznie w kodzie wybrac dwie cellki po pci i pokolorowac punkty w ich histerezie
# histereza to obszar w ktorym punkt pomiarowy moze sie zaliczyc do obu cellek, czyli obszar w ktorym cellki sie pokrywaja
