# Converting the Belgium cells from original csv file into an importable circus actor.  

## Parsing raw file

In [83]:
cells.head()

Unnamed: 0,cell_id,cell_name,cell_network_id,site_name,site_longitude,site_latitude,geo_level1_id,site_urban,site_id
0,CE000000,Cell 000000,45cf6c6b54a5e6de,SITE_001718,4.088464,50.742858,LVL1_Herne,True,SI001718
1,CE000001,Cell 000001,39e7e11e273842b2,SITE_001718,4.088464,50.742858,LVL1_Herne,True,SI001718
2,CE000002,Cell 000002,78d6545cb56b0a02,SITE_001718,4.088464,50.742858,LVL1_Herne,True,SI001718
3,CE000003,Cell 000003,00e4b57703d15676,SITE_001718,4.088464,50.742858,LVL1_Herne,True,SI001718
4,CE000004,Cell 000004,ba9f2e47aa3655c0,SITE_001718,4.088464,50.742858,LVL1_Herne,True,SI001718


In [82]:
import pandas as pd

cells = pd.read_csv("cells.csv")
cells.site_name = cells.site_name.str.upper().str.replace(" ", "_")

# removing the 3 random sites 
cells = cells[~(cells.site_name.str[:11] == "SITE_RANDOM")]

sites = cells[["site_name", "site_longitude", "site_latitude", "geo_level1_id"]].drop_duplicates()

sites.shape

(4208, 4)

In [84]:
sites.sample(8)

Unnamed: 0,site_name,site_longitude,site_latitude,geo_level1_id
50388,SITE_001367,5.277547,50.866045,LVL1_Alken
25908,SITE_005135,5.104118,51.296623,LVL1_Arendonk
27726,SITE_005691,5.717928,49.740395,LVL1_Attert
47634,SITE_002294,5.504317,51.130652,LVL1_Peer
1746,SITE_021727,5.664332,51.062493,LVL1_Maaseik
42426,SITE_001504,4.307167,50.870083,LVL1_Ganshoren
27216,SITE_004224,3.800453,50.61714,LVL1_Ath
21618,SITE_002399,4.349989,50.603777,LVL1_Nivelles


## Converting to Actor

In [85]:
from datagenerator.core import actor
from datagenerator.core import circus

circus = circus.Circus(name="belgium", master_seed=1234, 
                start=pd.Timestamp("22 July 2012"), 
                step_duration=pd.Timedelta("12h") )
sites_actor = circus.create_actor(name="sites", ids=sites.site_name)

sites_actor.create_attribute(name="LATITUDE",    init_values=sites.site_latitude );
sites_actor.create_attribute(name="LONGITUDE",   init_values=sites.site_longitude);
sites_actor.create_attribute(name="GEO_LEVEL_1", init_values=sites.geo_level1_id );

cell_rel = sites_actor.create_relationship(name="CELLS")
cell_rel.add_relations(from_ids=cells.site_name,to_ids=cells.cell_id)




In [86]:
circus.actors["sites"].to_dataframe().sample(4)

Unnamed: 0_level_0,LATITUDE,GEO_LEVEL_1,LONGITUDE
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SITE_001403,50.488032,LVL1_Mons,4.020131
SITE_006910,51.129826,LVL1_Lochristi,3.850763
SITE_001651,50.87046,LVL1_Liedekerke,4.082206
SITE_010654,51.19084,LVL1_Anvers,4.398593


## Saving

In [87]:
circus.save_to_db(overwrite=True)

