# Converting the Belgium cells from original csv file into an importable circus actor.  

## Sites

In [20]:
import pandas as pd

In [24]:
cells = pd.read_csv("source_data/geography/cell.csv")
cells.site_name = cells.site_name.str.upper().str.replace(" ", "_")

# removing the 3 random sites 
cells = cells[~(cells.site_name.str[:11] == "SITE_RANDOM")]

sites = cells[["site_name", "site_longitude", "site_latitude", "geo_level1_id", "site_urban"]].drop_duplicates()

sites.shape

(4208, 5)

In [25]:
sites.sample(8)

Unnamed: 0,site_name,site_longitude,site_latitude,geo_level1_id,site_urban
7608,SITE_003909,4.371938,50.933528,LVL1_Grimbergen,True
18906,SITE_003317,4.450927,51.156137,LVL1_Edegem,True
26820,SITE_001275,4.432213,51.247759,LVL1_Anvers,True
26442,SITE_006896,4.83073,51.235034,LVL1_Lille,True
49146,SITE_003222,6.108981,50.479074,LVL1_Waimes,False
46746,SITE_019726,2.668988,51.131909,LVL1_Koksijde,False
30462,SITE_009289,3.8366,50.475264,LVL1_Saint-Ghislain,False
38958,SITE_021024,3.262578,51.16207,LVL1_Oostkamp,False


In [60]:
from datagenerator.core import actor
from datagenerator.core import circus

circus = circus.Circus(name="belgium", master_seed=1234, 
                start=pd.Timestamp("22 July 2012"), 
                step_duration=pd.Timedelta("12h") )
sites_actor = circus.create_actor(name="sites", ids=sites.site_name)

sites_actor.create_attribute(name="LATITUDE",  init_values=sites.site_latitude );
sites_actor.create_attribute(name="LONGITUDE",   init_values=sites.site_longitude);
sites_actor.create_attribute(name="GEO_LEVEL_1", init_values=sites.geo_level1_id );
sites_actor.create_attribute(name="URBAN", init_values=sites.site_urban );

cell_rel = sites_actor.create_relationship(name="CELLS")
cell_rel.add_relations(from_ids=cells.site_name,to_ids=cells.cell_id)




In [61]:
circus.actors["sites"].to_dataframe().sample(4)

Unnamed: 0_level_0,LATITUDE,URBAN,GEO_LEVEL_1,LONGITUDE
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SITE_001581,50.891744,True,LVL1_Geetbets,5.162842
SITE_021274,50.600061,True,LVL1_Seraing,5.506293
SITE_007343,51.037676,False,LVL1_Houthalen-Helchteren,5.372035
SITE_001442,51.157298,True,LVL1_Geel,4.9949


# Dealers

In [62]:
distributors = pd.read_csv("source_data/agent/distributor.csv").drop("agent_class", axis=1)
distributors = distributors[distributors["distributor_type"] != "origin"].drop("distributor_type", axis=1)

l2_names = ["DIST1", "DIST2", "DIST3", "DIST4"]
dist_l2 = distributors[distributors["agent_id"].isin(l2_names)] 
dist_l1 = distributors[~distributors["agent_id"].isin(l2_names)] 

dist_l2.head()

Unnamed: 0,agent_id,agent_name,agent_contact_name,agent_contact_phone
1,DIST1,HEINZ DISTRIB 6848,Cristopher Coles,32433162415
2,DIST2,AMORA DISTRIB 6189,Darron Craver,32497416595
3,DIST3,DEVOS & LEMMENS DISTRIB 3093,Cristopher Coles,32434046530
4,DIST4,DEVOS & LEMMENS DISTRIB 190,Adina Pacheo,32495097646


In [63]:
dist_l1.head(4)

Unnamed: 0,agent_id,agent_name,agent_contact_name,agent_contact_phone
5,DIST5,LA WILLIAMS DISTRIB 7331,Coreen Hoxie,32410717340
6,DIST6,LA WILLIAMS DISTRIB 4101,Danika Gierlach,32410526407
7,DIST7,CALVE DISTRIB 730,Scarlet Champine,32414549221
8,DIST8,AMORA DISTRIB 76,Adolfo Fuselier,32416587328


In [64]:
dist_l1_ac = circus.create_actor(name="dist_l1", ids=dist_l1.agent_id)
dist_l2_ac = circus.create_actor(name="dist_l2", ids=dist_l2.agent_id)

In [65]:
dist_l1_ac.create_attribute(name="NAME",          init_values=dist_l1.agent_name );
dist_l1_ac.create_attribute(name="CONTACT_NAME",  init_values=dist_l1.agent_contact_name );
dist_l1_ac.create_attribute(name="CONTACT_PHONE", init_values=dist_l1.agent_contact_phone );

dist_l2_ac.create_attribute(name="NAME",          init_values=dist_l2.agent_name );
dist_l2_ac.create_attribute(name="CONTACT_NAME",  init_values=dist_l2.agent_contact_name );
dist_l2_ac.create_attribute(name="CONTACT_PHONE", init_values=dist_l2.agent_contact_phone );




In [15]:
dealers_pos = pd.read_csv("source_data/distributor_pos_product.csv")

distributor_rels = pd.merge(left=dealers_l1, right=dealers_pos, 
         left_on="distributor_id", right_on="distributor_id",
         suffixes=('', '_y')).drop("product_type_id_y", axis=1)

In [16]:
distributor_rels.head()

Unnamed: 0,distributor_id,geo_level1_id,product_type_id,agent_id
0,DIST1,LVL1_Affligem,sim,FIPO6
1,DIST1,LVL1_Affligem,sim,FIPO20
2,DIST1,LVL1_Affligem,sim,FIPO21
3,DIST1,LVL1_Affligem,sim,FIPO22
4,DIST1,LVL1_Affligem,sim,FIPO25


In [17]:
distributor_geo_l1_rel = distributor_rels[["distributor_id", "geo_level1_id", "product_type_id"]].drop_duplicates()
distributor_geo_l1_rel.head(3)

Unnamed: 0,distributor_id,geo_level1_id
0,DIST1,LVL1_Affligem
8970,DIST1,LVL1_Asse
17940,DIST1,LVL1_Beersel


In [None]:
distributor_geo_l1_rel = distributor_rels[["distributor_id", "geo_level1_id"]].drop_duplicates()
distributor_geo_l1_rel.head(3)

# Saving circus

In [66]:
circus.save_to_db(overwrite=True)

