# Mosquito Sinks and Sources Detection

# Setup

In [2]:
import numpy as np
import pandas as pd

In [3]:
transitions = pd.read_csv('data/kernel_regular_5000.csv', header=None)
locations = pd.read_csv('data/stp_all_sites_v3.csv')

# A Peek at the Data

In [4]:
display(transitions.shape, transitions.head())

(509, 509)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,499,500,501,502,503,504,505,506,507,508
0,0.798,0.006,0.005,0.0035,0.0075,0.001,0.003,0.003,0.001,0.001,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.006,0.7465,0.0145,0.012,0.007,0.0035,0.0035,0.0055,0.003,0.005,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.005,0.0145,0.754,0.016,0.007,0.004,0.0045,0.007,0.0015,0.0035,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0035,0.012,0.016,0.7705,0.0035,0.006,0.0035,0.0025,0.0045,0.0035,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0075,0.007,0.007,0.0035,0.7535,0.0035,0.004,0.0015,0.0035,0.006,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
transitions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509 entries, 0 to 508
Columns: 509 entries, 0 to 508
dtypes: float64(506), int64(3)
memory usage: 2.0 MB


In [6]:
display(locations.shape, locations.head())

(509, 3)

Unnamed: 0,lon,lat,pop
0,7.42917,1.6193,222
1,7.41421,1.63409,12
2,7.4139,1.63602,8
3,7.41219,1.63492,16
4,7.42591,1.64041,23


In [7]:
locations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509 entries, 0 to 508
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   lon     509 non-null    float64
 1   lat     509 non-null    float64
 2   pop     509 non-null    int64  
dtypes: float64(2), int64(1)
memory usage: 12.1 KB


# The Detector Class - Example Usage

In [8]:
%run Detector.ipynb

In [9]:
d = Detector(transitions, 
             locations,
             as_df=True,
             ss_vals=10000,
             n_clusters=4, 
             random_state=0)

In [10]:
# At what step is the system is steady-state?
d.ss_step

5974

In [None]:
# If we start at time step 0, what happens at steady-state?
d.run()
display(d.clabels().head(),\
        d.results().head()
)
print("Steady-state step:", d.ss_step)

In [None]:
# The entries below should not be much different than before
d.run(0, d.ss_step + 1000)
d.results().head()

In [None]:
# What happens if we stop before steady-state?
d.run(0, 100)
d.results().head()

In [None]:
# What happens from time step 1000 to steady-state?
d.run(1000)
display(d.clabels().head(),\
        d.results().head()
)

# Plots

### First, let's see where the data is geographically:

In [None]:
import folium
import folium.plugins

In [None]:
COORDS = (7.42917, 1.61930)
folimp = folium.Map(locations=COORDS, zoom_start=13)
points = locations[['lat', 'lon']].astype('float').values
heatmp = folium.plugins.HeatMap(points.tolist(), radius=10)
folimp.add_child(heatmp)

In [None]:
cluster = folium.plugins.MarkerCluster()
for _, r in locations[['lat', 'lon']].iterrows():
    cluster.add_child(folium.Marker([float(r['lat']), float(r['lon'])]))
folimp = folium.Map(locations=COORDS, zoom_start=13)
folimp.add_child(cluster)
folimp

In [None]:
# Looks like there are two islands that are independent. Let's split 
# them up, and analyze each individually:
d.run()
cids = d.clabels()
prps = d.results()

principe_locs = cids[cids['lat'] > 1.25]
principe_tmtx = transitions.iloc[principe_locs.index, principe_locs.index]
principe_coms = prps[prps.index.isin(principe_locs['cid'].unique())]

sao_tome_locs = cids[cids['lat'] < 0.50]
sao_tome_tmtx = transitions.iloc[sao_tome_locs.index, sao_tome_locs.index]
sao_tome_coms = prps[prps.index.isin(sao_tome_locs['cid'].unique())]

### Now let's use a graph to visualize the network:

In [None]:
import geojson
from descartes import PolygonPatch

# To plot a background, convert the SHP file(s) to a geojson file
# Use: https://mygeodata.cloud/converter/shp-to-geojson
with open("STP.geojson") as json_file:
    json_data = geojson.load(json_file)

poly = json_data['features']

In [None]:
# Retrieves the polygon coordinates for the background 
principe = { 'type' : 'MultiPolygon', 'coordinates' : poly[0]['geometry']['coordinates'] }
sao_tome = { 'type' : 'MultiPolygon', 'coordinates' : poly[1]['geometry']['coordinates'] }
all_data = { 'type' : 'MultiPolygon', 'coordinates' : poly[0]['geometry']['coordinates'][:] }
all_data['coordinates'].extend(poly[1]['geometry']['coordinates'][:])

In [None]:
plot_data(principe_tmtx, principe_locs, principe_coms,\
          nodes_fn=lambda x: x**(1/3),
          bordr_mu=3,
          min_prob=0.007,
          edges_mu=100,
          bgrd_crd=principe,
          bgbd_lwd=5,
          fig_size=(10,9),
          plt_bbar=True,
          plt_pbar=True,
          plt_sbar=True
)

In [None]:
plot_data(sao_tome_tmtx, sao_tome_locs, sao_tome_coms,\
          nodes_fn=lambda x: x**(1/6),
          bordr_mu=5,
          edges_fn=np.log10,
          min_prob=0.009,
          edges_mu=1,
          bgrd_crd=sao_tome,
          bgbd_lwd=2,
          fig_size=(13,9),
          plt_bbar=True,
          plt_pbar=True,
          plt_sbar=True,
)

In [None]:
plot_data(sao_tome_tmtx, sao_tome_locs, sao_tome_coms,\
          nodes_fn=lambda x: x**(1/3),
          bordr_mu=5,
          edges_fn=np.log10,
          min_prob=1,
          edges_mu=1,
          bgrd_crd=sao_tome,
          bgbd_lwd=2,
          fig_size=(12,12)
)

In [None]:
plot_data(transitions, d.clabels(), d.results(),\
          nodes_fn=np.sqrt,
          bordr_mu=2,
          edges_fn=np.log10,
          min_prob=1,
          edges_mu=2,
          bgrd_crd=all_data,
          bgbd_lwd=2,
          fig_size=(12,13),
 )

# References

1. Polygon Plotting: https://gis.stackexchange.com/questions/93136/how-to-plot-geo-data-using-matplotlib-python/93201#93201

2. SHP file: https://data.humdata.org/dataset/sao-tome-and-principe-administrative-boundaries

3. Colormaps: https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html