In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import numpy as np
sys.path.insert(0,'..')
from cider.home_location import *

config_file = '../configs/config_new.yml'

### Load from datastore

In [None]:
datastore = DataStore(config_file)

In [None]:
setattr(datastore, "outputs", str(datastore.cfg.path.working.directory_path))

In [None]:
spark = datastore.spark
home_locator = HomeLocator(datastore)


In [None]:
cdr = pd.read_csv('../synthetic_data/cdr.csv')
outgoing = cdr[['caller_id', 'caller_antenna', 'timestamp']]\
    .rename({'caller_id':'name', 'caller_antenna':'antenna_id'}, axis=1)
incoming = cdr[['recipient_id', 'recipient_antenna', 'timestamp']]\
    .rename({'recipient_id':'name', 'recipient_antenna':'antenna_id'}, axis=1)
cdr = pd.concat([outgoing, incoming])
antennas = pd.read_csv('../synthetic_data/antennas.csv')
cdr = cdr.merge(antennas, on='antenna_id', how='inner')
home_locations_gt = pd.read_csv('../synthetic_data/home_locations.csv')


In [None]:
shapefile = gpd.read_file('../synthetic_data/regions.geojson')

In [None]:
shapefile

### Modal Tower

In [None]:
homes = home_locator.get_home_locations(geo="antenna_id", algo='count_transactions').rename({'subscriber_id':'name'}, axis=1)

In [None]:
from cider.homelocation.inference import get_home_locations
from cider.homelocation.schemas import GeographicUnit, GetHomeLocationAlgorithm

In [None]:
cdr.rename(columns={"name": "caller_id", "antenna_id": "caller_antenna_id"}, inplace=True)
antennas.rename(columns={"tower_id": "old_tower_id"}, inplace=True)
antennas["tower_id"] = antennas["antenna_id"]

In [None]:
homes_2 = get_home_locations(
    spark_session=spark,
    validated_cdr_data=cdr,
    validated_antenna_data=antennas,
    shapefile_data=shapefile,
    geographic_unit=GeographicUnit.ANTENNA_ID,
    algorithm=GetHomeLocationAlgorithm.COUNT_MODAL_DAYS,
    # additional_columns_to_keep=["region"],
)

In [None]:
home_locations_gt.rename(
    columns={
        "subscriber_id": "caller_id",
        "antenna_id": "caller_antenna_id",
        "regions": "region"}, 
    inplace=True)

In [None]:
homes_2.head()

In [None]:
home_locations_gt.head()

In [None]:
from cider.homelocation.inference import get_accuracy

table = get_accuracy(homes_2, home_locations_gt, column_to_merge_on="caller_id", column_to_measure_on="region")

In [None]:
table.is_correct.mean()

In [None]:
homes.rename(
    columns={
        "name": "caller_id", 
        "count_modal_days": "transaction_modal_days_count"}, 
        inplace=True)

In [None]:
homes_2.drop(columns["caller_antenna_id"])

In [None]:
homes2

In [None]:
import deepdiff
deepdiff.DeepDiff(homes.sort_values(by="caller_id").reset_index(drop=True), homes_2.sort_values(by="caller_id").reset_index(drop=True))

In [None]:
grouped = pd.DataFrame(cdr.groupby('name')['tower_id'].agg(pd.Series.mode))
grouped['tower_id'] = grouped['tower_id'].apply(lambda x:  x if ','.join(x)[1] == ',' else np.nan)
grouped = grouped.dropna()
merged = grouped.merge(homes, on='name')
assert len(merged[merged['tower_id_x'] != merged['tower_id_y']]) == 0

### Tower with Most Unique Days

In [None]:
homes = home_locator.get_home_locations('antenna_id', 'count_days').rename({'subscriber_id':'name'}, axis=1)

In [None]:
cdr['day'] = cdr['timestamp'].apply(lambda x: x[:10])
grouped = pd.DataFrame(cdr.groupby(['name', 'tower_id'])['timestamp'].nunique())
max_counts = grouped.groupby('name').agg('max')
max_counts = max_counts.to_dict(orient='index')
for key in list(max_counts.keys()):
    max_counts[key] = max_counts[key]['timestamp']
grouped['c'] = [x[0] for x in list(grouped.index)]
grouped['t'] = [x[1] for x in list(grouped.index)]
grouped['max'] = grouped.apply(lambda row: 1 if row['timestamp'] == max_counts[row['c']] else 0, axis=1)
grouped = grouped[grouped['max'] == 1]
included = grouped.groupby('c').agg('count')
included = set(included[included['max'] == 1].index)
grouped = grouped[grouped['c'].isin(included)]\
    .reset_index()\
    .drop(['name', 'tower_id'], axis=1)\
    [['c', 't']].rename({'c':'name', 't':'tower_id'}, axis=1)
assert len(merged[merged['tower_id_x'] != merged['tower_id_y']]) == 0

### Mode of Modes

In [None]:
# This one is nearly impossible to test for with the synthetic data, there are not enough unique modes so 
# it's basically random

### Maps

In [None]:
homes = home_locator.get_home_locations('antenna_id', 'count_days').rename({'subscriber_id':'name'}, axis=1)
home_locator.map(geo='antenna_id', algo='count_days', kind='population', voronoi=False)

In [None]:
home_locator.accuracy(geo='antenna_id',algo='count_days')
# home_locator.map(algo='count_days', kind='precision', voronoi=False)
# home_locator.map(algo='count_days', kind='recall', voronoi=False)

In [None]:
from cider.homelocation.dependencies import get_voronoi_tessellation

In [None]:
antennas

In [None]:
antennas[['latitude', 'longitude']] = antennas[['latitude', 'longitude']].drop_duplicates()

antennas_gdf = gpd.GeoDataFrame(antennas, geometry=gpd.points_from_xy(antennas.longitude, antennas.latitude), crs="EPSG:4326")

In [None]:
antennas_gdf.dropna(inplace=True)

In [None]:
antennas_gdf.within(shapefile).sum()

In [None]:
new_vornoi = get_voronoi_tessellation(
    xy_points=antennas_gdf,
    boundary_shapefile=shapefile,
    points_id_col='antenna_id'
)

In [None]:
new_vornoi

In [None]:
from deprecated.helpers.plot_utils import voronoi_tessellation

In [None]:
old_voronoi = voronoi_tessellation(
    points=antennas,
    shapefile=shapefile,
    key='antenna_id',
)

In [None]:
set(old_voronoi['antenna_id']) == set(new_vornoi['antenna_id'])

In [None]:
# Mapping
from cider.homelocation.plotting import make_location_map

In [None]:
homes_2_gdf.columns

In [None]:
merged = homes_2.merge(antennas, left_on='caller_antenna_id', right_on='antenna_id', how='left')
homes_2_gdf = gpd.GeoDataFrame(
    merged,
    geometry=gpd.points_from_xy(merged.longitude, merged.latitude),
    crs="EPSG:4326"
)
fig = make_location_map(
    inferred_home_locations=homes_2_gdf,
    boundaries_shapefile=shapefile,
    column_to_plot_label='caller_antenna_id',
    column_to_plot_markersize='transaction_modal_days_count',
    color='grey'
)
