In [1]:
# load FSA geojson
import geopandas as gpd

gdf = gpd.read_file('gdf_poly.geojson')

In [2]:
ont = gdf.copy()
ont['first'] = [i[0] for i in ont['id']]
ont = ont[ont['first'].isin(["K", "L", "M", "N", "O", "P"])]
ont = ont.drop(columns = 'first')

In [31]:
from recipes.categorical_anomaly import CategoricalAnomalyDetector
detector = CategoricalAnomalyDetector(ont, metrics_column='Win Rate', 
                                        category_column='id', model_type='IsolationForest', 
                                        geotif_path='earth_pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif',
                                        ScalerClass="StandardScaler",)

In [32]:
pop = detector.raster_to_vector()

In [33]:
pop.head()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,249,237,0,2.759268,0.951807,0.0,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",17911.61
6,K0J,45535,35989,50,417.302705,0.790359,0.001389,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",35934.75
7,K0M,109857,84399,111,979.728851,0.768262,0.001315,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",48236.25
8,K0G,62002,47898,89,556.46528,0.772523,0.001858,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",35465.14
15,K1Z,799,698,1,8.11539,0.873592,0.001433,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",23618.08


In [34]:
scaled = detector.scale()

In [35]:
scaled.head()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.47101,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.04297,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333


In [36]:
detector.create_model(contamination = 0.1)

IsolationForest(contamination=0.1)

In [37]:
detector.model.get_params()

{'bootstrap': False,
 'contamination': 0.1,
 'max_features': 1.0,
 'max_samples': 'auto',
 'n_estimators': 100,
 'n_jobs': None,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [38]:
predictions = detector.predict()

In [39]:
predictions.head()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count,score,anomaly
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.47101,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546,1,False
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.04297,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636,1,False
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808,1,False
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931,1,False
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333,1,False


In [40]:
detector.graph_scatter()

In [41]:
detector.graph_geo()

In [42]:
from recipes.categorical_anomaly import CategoricalAnomalyDetector
lof = CategoricalAnomalyDetector(ont, 
                        metrics_column=['impressions', 'clicks','CTR', 'pop_count'], 
                        category_column='id', model_type='LocalOutlierFactor',
                        geotif_path='earth_pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif',ScalerClass="StandardScaler")

In [43]:
lof.raster_to_vector()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,249,237,0,2.759268,0.951807,0.000000,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",17911.61
6,K0J,45535,35989,50,417.302705,0.790359,0.001389,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",35934.75
7,K0M,109857,84399,111,979.728851,0.768262,0.001315,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",48236.25
8,K0G,62002,47898,89,556.465280,0.772523,0.001858,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",35465.14
15,K1Z,799,698,1,8.115390,0.873592,0.001433,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",23618.08
...,...,...,...,...,...,...,...,...,...
582,P3C,1178,955,0,11.126938,0.810696,0.000000,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",16216.31
584,P0V,3497,2730,4,31.574477,0.780669,0.001465,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",17216.64
585,P0T,36093,28817,48,334.297858,0.798410,0.001666,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",29009.14
587,P0E,3841,2754,1,32.041069,0.717001,0.000363,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",2741.76


In [44]:
lof.scale()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.471010,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.042970,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333
...,...,...,...,...,...,...,...,...,...
582,P3C,-0.345537,-0.346790,-0.389365,-0.346836,-0.363696,-0.471010,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",-0.590734
584,P0V,-0.251403,-0.254160,-0.240349,-0.254815,-0.759089,0.071092,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",-0.538108
585,P0T,1.071745,1.107203,1.398831,1.107535,-0.525477,0.145469,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",0.082286
587,P0E,-0.237440,-0.252908,-0.352111,-0.252716,-1.597465,-0.336687,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",-1.299619


In [45]:
lof.create_model(n_neighbors = 48, contamination = 0.08)

LocalOutlierFactor(contamination=0.08, n_neighbors=48)

In [46]:
lof.predict()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count,score,anomaly
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.471010,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546,1,False
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.042970,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636,1,False
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808,-1,True
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931,1,False
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333,1,False
...,...,...,...,...,...,...,...,...,...,...,...
582,P3C,-0.345537,-0.346790,-0.389365,-0.346836,-0.363696,-0.471010,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",-0.590734,1,False
584,P0V,-0.251403,-0.254160,-0.240349,-0.254815,-0.759089,0.071092,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",-0.538108,1,False
585,P0T,1.071745,1.107203,1.398831,1.107535,-0.525477,0.145469,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",0.082286,1,False
587,P0E,-0.237440,-0.252908,-0.352111,-0.252716,-1.597465,-0.336687,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",-1.299619,1,False


In [47]:
lof.graph_scatter()

In [48]:
lof.graph_highd()

In [49]:
lof.graph_highd(three_d = True)

In [50]:
from recipes.categorical_anomaly import CategoricalAnomalyDetector
loda = CategoricalAnomalyDetector(ont, 
                        metrics_column=['impressions', 'clicks', 'CTR', 'pop_count'], 
                        category_column='id', model_type='LODA',
                        geotif_path='earth_pop/GHS_POP_E2015_GLOBE_R2019A_4326_30ss_V1_0.tif',ScalerClass="StandardScaler")

In [51]:
loda.raster_to_vector()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,249,237,0,2.759268,0.951807,0.000000,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",17911.61
6,K0J,45535,35989,50,417.302705,0.790359,0.001389,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",35934.75
7,K0M,109857,84399,111,979.728851,0.768262,0.001315,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",48236.25
8,K0G,62002,47898,89,556.465280,0.772523,0.001858,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",35465.14
15,K1Z,799,698,1,8.115390,0.873592,0.001433,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",23618.08
...,...,...,...,...,...,...,...,...,...
582,P3C,1178,955,0,11.126938,0.810696,0.000000,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",16216.31
584,P0V,3497,2730,4,31.574477,0.780669,0.001465,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",17216.64
585,P0T,36093,28817,48,334.297858,0.798410,0.001666,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",29009.14
587,P0E,3841,2754,1,32.041069,0.717001,0.000363,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",2741.76


In [52]:
loda.scale()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.471010,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.042970,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333
...,...,...,...,...,...,...,...,...,...
582,P3C,-0.345537,-0.346790,-0.389365,-0.346836,-0.363696,-0.471010,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",-0.590734
584,P0V,-0.251403,-0.254160,-0.240349,-0.254815,-0.759089,0.071092,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",-0.538108
585,P0T,1.071745,1.107203,1.398831,1.107535,-0.525477,0.145469,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",0.082286
587,P0E,-0.237440,-0.252908,-0.352111,-0.252716,-1.597465,-0.336687,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",-1.299619


In [53]:
loda.create_model()

LODA()

In [54]:
loda.predict()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count,score,anomaly
2,K1B,-0.383247,-0.384259,-0.389365,-0.384493,1.494445,-0.471010,"MULTIPOLYGON (((-75.54520 45.43477, -75.55599 ...",-0.501546,1,False
6,K0J,1.455018,1.481477,1.473339,1.481083,-0.631492,0.042970,"MULTIPOLYGON (((-77.93027 45.37394, -78.17846 ...",0.446636,1,False
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808,-1,True
8,K0G,2.123452,2.102955,2.926249,2.107358,-0.866355,0.216516,"MULTIPOLYGON (((-76.05783 44.89132, -76.05972 ...",0.421931,1,False
15,K1Z,-0.360921,-0.360201,-0.352111,-0.360389,0.464515,0.059251,"MULTIPOLYGON (((-75.74219 45.39723, -75.75374 ...",-0.201333,1,False
...,...,...,...,...,...,...,...,...,...,...,...
582,P3C,-0.345537,-0.346790,-0.389365,-0.346836,-0.363696,-0.471010,"MULTIPOLYGON (((-80.99643 46.49092, -80.99821 ...",-0.590734,1,False
584,P0V,-0.251403,-0.254160,-0.240349,-0.254815,-0.759089,0.071092,"MULTIPOLYGON (((-94.37484 49.72693, -94.37316 ...",-0.538108,1,False
585,P0T,1.071745,1.107203,1.398831,1.107535,-0.525477,0.145469,"MULTIPOLYGON (((-90.84559 48.24667, -90.87558 ...",0.082286,1,False
587,P0E,-0.237440,-0.252908,-0.352111,-0.252716,-1.597465,-0.336687,"MULTIPOLYGON (((-79.34665 44.91020, -79.33064 ...",-1.299619,1,False


In [55]:
loda.get_anomalies()

Unnamed: 0,id,bids,impressions,clicks,spend,Win Rate,CTR,geometry,pop_count,score,anomaly
7,K0M,4.065998,4.007778,3.745839,4.012177,-0.922464,0.015587,"MULTIPOLYGON (((-78.91076 44.21080, -78.91731 ...",1.093808,-1,True
28,K0A,7.93526,7.852127,7.508502,7.865449,-0.894469,0.025208,"MULTIPOLYGON (((-75.38381 45.34085, -75.39819 ...",4.701013,-1,True
29,K0H,3.331196,3.294297,3.373298,3.293713,-0.888728,0.057401,"MULTIPOLYGON (((-76.95872 44.07585, -76.96484 ...",1.093589,-1,True
30,K0L,6.560438,6.588351,6.092847,6.592152,-0.750319,0.010036,"MULTIPOLYGON (((-78.64111 44.31767, -78.64686 ...",2.49282,-1,True
46,K0K,4.8295,4.83283,5.534035,4.834254,-0.783305,0.116237,"MULTIPOLYGON (((-76.61657 43.92247, -76.60763 ...",4.353308,-1,True
71,K0C,2.544232,2.611868,2.702724,2.613516,-0.549008,0.061841,"MULTIPOLYGON (((-75.44425 45.04771, -75.40857 ...",1.339687,-1,True
121,L0R,2.542568,2.487718,2.143913,2.499059,-0.976189,-0.015866,"MULTIPOLYGON (((-79.90484 43.37897, -79.98498 ...",3.483156,-1,True
146,L4C,-0.255787,-0.239079,-0.389365,-0.238549,0.691412,-0.47101,"MULTIPOLYGON (((-79.44286 43.89595, -79.44360 ...",2.774258,-1,True
149,K9J,4.150633,4.22607,3.894855,4.21521,-0.618824,0.009296,"MULTIPOLYGON (((-78.39220 44.37315, -78.38709 ...",0.889049,-1,True
181,K6K,-0.393192,-0.396522,-0.389365,-0.396806,-4.454921,-0.47101,"MULTIPOLYGON (((-74.70840 45.08554, -74.76944 ...",-1.309924,-1,True


In [56]:
loda.graph_scatter()

In [57]:
len(loda.get_anomalies())

25

In [58]:
loda.graph_highd()