# Data Formating for the Story
In this notebook we are going to focus on the formating of the data that will displayed on the Data Story Website : [tweet-awareness.eu](http://tweet-awareness.eu). As this website will display maps using Jave Script directly, we will have to handle the native data format of the data : `.json`

In [291]:
import pandas as pd
import numpy as np 
import os
import pickle

In [179]:
# We first define our local path to the Git Hub page repository : 
story_path = '../../../Data-Story/'
data_path = '../../../Project Data'
tweet_path = os.path.join(data_path,'Tweets')


## 1. Displaying the twitter results : 
We are going to save the twitter results in `.json` : 
- The twitter raw tweet number
- The twitter Normalized Data

Loading the Summary Data : 

In [274]:
event_list = [ev for ev in os.listdir(tweet_path) if ('.DS_Store' not in ev)]
i = 0
for event in event_list:
    
    i += 1
    summary_path = os.path.join(tweet_path,event,'Geocoded/summary.pickle')
    if i == 1 :
        summary_df = pd.read_pickle(summary_path)
        summary_df.columns = [event]
    else :
        event_df = pd.read_pickle(summary_path)
        event_df.columns = [event]
        summary_df = summary_df.join(event_df)
summary_df.fillna(0.0,inplace=True)

event_country = dict(zip(list(summary_df.columns),["FR", "TR",  "NG","US","BE", "PK", "LB", "NG"]))

# Setting 
all_tweets = summary_df.copy()
summary_norm_df = summary_df.copy()

for event, country in event_country.items():
    all_tweets.loc[country,event] = np.nan
for event in list(summary_df.columns):
    summary_norm_df.loc[:,event]  = summary_df.loc[:,event]/ all_tweets.mean(axis=1, skipna = True)
    
summary_norm_df.head()

Unnamed: 0_level_0,Charlie-Hebdo,Istanbul,Nigeria_2015,Orlando,Bruxelles,Pakistan,Lebanon,Nigeria_2016
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AD,4.915663,0.082616,0.0,2.203098,0.784854,0.013769,0.0,0.0
AE,3.310254,0.464227,0.170568,1.586108,0.669964,1.34872,0.436971,0.013188
AF,3.971223,0.141269,0.109876,2.192283,0.627861,0.81622,0.068018,0.07325
AG,3.356098,0.039024,0.039024,4.019512,0.429268,0.117073,0.0,0.0
AI,0.188764,0.0,0.026966,7.640449,0.107865,0.026966,0.0,0.008989


Loading Additional Informations : 

In [279]:
country_data_path = os.path.join(data_path,'country_data.pickle')
c_data_df = pd.read_pickle(country_data_path)
c_data_df.reset_index(inplace=True)
c_data_df.set_index('ISO3',inplace=True)

In [280]:
measurement_df = c_data_df[[('name',''),('ISO2',''),('Internet users',''),('POP',''),('latlng','')]]
measurement_df.columns = measurement_df.columns.droplevel(level=1)
measurement_df = measurement_df.join(summary_df,on='ISO2')
measurement_df.head()

Unnamed: 0_level_0,name,ISO2,Internet users,POP,latlng,Charlie-Hebdo,Istanbul,Nigeria_2015,Orlando,Bruxelles,Pakistan,Lebanon,Nigeria_2016
ISO3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ABW,Aruba,AW,99000.0,113648.0,"[12.5, -69.96666666]",57.0,2.0,1.0,46.0,10.0,0.0,0.0,0.0
AFG,Afghanistan,AF,2690000.0,33332025.0,"[33, 65]",759.0,27.0,21.0,419.0,120.0,156.0,13.0,14.0
AGO,Angola,AO,2434000.0,20172332.0,"[-12.5, 18.5]",198.0,5.0,15.0,264.0,45.0,10.0,0.0,1.0
AIA,Anguilla,AI,12000.0,16752.0,"[18.25, -63.16666666]",21.0,0.0,3.0,850.0,12.0,3.0,0.0,1.0
ALA,Åland Islands,AX,,,"[60.116667, 19.9]",10.0,0.0,1.0,29.0,7.0,4.0,0.0,0.0


In [250]:
# Saving the This DF in Json format :
measurement_path = os.path.join(story_path,'I-Measurement','measurement.json')
measurement_json = measurement_df.to_json(force_ascii=False,orient='columns',path_or_buf=measurement_path) #,path_or_buf=measurement_path

In [191]:
measurement_json

In [277]:

measurement_norm_df = []

In [281]:
measurement_norm_df = c_data_df[[('name',''),('ISO2',''),('Internet users',''),('POP',''),('latlng','')]]
measurement_norm_df.columns = measurement_norm_df.columns.droplevel(level=1)
measurement_norm_df = measurement_norm_df.join(summary_norm_df,on='ISO2')
measurement_norm_df.head()

Unnamed: 0_level_0,name,ISO2,Internet users,POP,latlng,Charlie-Hebdo,Istanbul,Nigeria_2015,Orlando,Bruxelles,Pakistan,Lebanon,Nigeria_2016
ISO3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ABW,Aruba,AW,99000.0,113648.0,"[12.5, -69.96666666]",3.931034,0.137931,0.068966,3.172414,0.689655,0.0,0.0,0.0
AFG,Afghanistan,AF,2690000.0,33332025.0,"[33, 65]",3.971223,0.141269,0.109876,2.192283,0.627861,0.81622,0.068018,0.07325
AGO,Angola,AO,2434000.0,20172332.0,"[-12.5, 18.5]",2.944238,0.074349,0.223048,3.925651,0.669145,0.148699,0.0,0.01487
AIA,Anguilla,AI,12000.0,16752.0,"[18.25, -63.16666666]",0.188764,0.0,0.026966,7.640449,0.107865,0.026966,0.0,0.008989
ALA,Åland Islands,AX,,,"[60.116667, 19.9]",1.568627,0.0,0.156863,4.54902,1.098039,0.627451,0.0,0.0


In [288]:
# Saving the This DF in Json format :
measurement_path = os.path.join(story_path,'I-Measurement','measurement_norm.json')
measurement_json = measurement_norm_df.to_json(force_ascii=False,orient='columns',path_or_buf=measurement_path) #,path_or_buf=measurement_path

## 2. The Metrics

In [331]:
language_df = pd.read_pickle("./../LinkingLanguages/country_dist_languages.pkl")
country_data = c_data_df[[('name',''),('ISO2',''),('Internet users',''),('POP',''),('latlng','')]]
country_data.columns = country_data.columns.droplevel(level=1)
country_codes = country_data[['ISO2']].reset_index().set_index('ISO2')

In [332]:
language_df = language_df.join(country_codes).reset_index().set_index("ISO3").drop("index",axis=1)
col_replace = 
language_df.head()

Unnamed: 0_level_0,AW,AF,AO,AI,AX,AL,AD,AE,AR,AM,...,VG,VI,VN,VU,WF,WS,YE,ZA,ZM,ZW
ISO3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABW,0.0,17.5,4.0,13.0,12.0,9.0,4.0,5.0,4.5,10.25,...,13.0,13.0,inf,13.333333,7.5,9.666667,5.0,11.0,13.0,13.285714
AFG,17.5,0.0,20.0,15.0,13.0,7.0,16.0,10.0,18.0,9.5,...,15.0,15.0,inf,17.666667,21.0,15.0,10.0,14.0,15.0,13.0
AGO,4.0,20.0,0.0,3.0,1.0,3.0,4.0,5.0,3.0,3.0,...,3.0,3.0,inf,10.666667,4.0,3.0,5.0,8.666667,3.0,12.0
AIA,13.0,15.0,3.0,0.0,3.0,10.0,19.0,4.0,3.0,7.5,...,0.0,0.0,inf,0.0,3.0,0.0,4.0,0.0,0.0,0.0
ALA,12.0,13.0,1.0,3.0,0.0,2.0,3.0,4.0,2.0,2.0,...,3.0,3.0,inf,6.666667,3.0,3.0,4.0,5.0,3.0,7.25


In [None]:
country_data.loc[]

In [303]:
import json
LOL = {}
LOL['FUCK'] = country_data.to_json()

In [322]:
country_codes.head()

AttributeError: 'NoneType' object has no attribute 'head'