# Human Trafficking Indicators (HTI) 

### Overview

In [1]:
import pandas as pd
import numpy as np
import unicodecsv
import seaborn as sns
import matplotlib.pyplot as plt
import pprint 
%matplotlib inline

data = pd.read_csv('HTI.csv')

In [2]:
data.keys()

Index([u'country', u'ccode', u'year', u'tier', u'minstand', u'efforts',
       u'TRAFFICKING_FLOWS', u'source', u'source_order', u'transit',
       u'transit_order', u'destination', u'destination_order', u'internal',
       u'TRAFFICKING_TYPES', u'psource', u'ptransit', u'pdest', u'pinternal',
       u'lsource', u'ltransit', u'ldest', u'linternal', u'dsource',
       u'dtransit', u'ddest', u'dinternal', u'dssource', u'dstransit',
       u'dsdest', u'dsinternal', u'cpsource', u'cptransit', u'cpdest',
       u'cpinternal', u'clsource', u'cltransit', u'cldest', u'clinternal',
       u'cssource', u'cstransit', u'csdest', u'csinternal', u'PROSECUTION',
       u'domesticlaws', u'enforcement', u'convictinfo', u'prosnum',
       u'PROTECTION', u'protectprogress', u'victimid', u'victimservices',
       u'victimpunish', u'PREVENTION', u'preventprogress',
       u'INTERNATIONAL_LAWS', u'UNP_sign', u'UNP_rat', u'ILO182', u'ILO29',
       u'ILO105', u'CEDAW_sign', u'CEDAW_rat', u'UNCRC_sign', u'UNC

In [3]:
data.isnull().sum()

country                  0
ccode                    0
year                     0
tier                     0
minstand                 0
efforts                  0
TRAFFICKING_FLOWS     1587
source                   0
source_order             0
transit                  0
transit_order            0
destination              0
destination_order        0
internal                 0
TRAFFICKING_TYPES     1587
psource                  0
ptransit                 0
pdest                    0
pinternal                0
lsource                  0
ltransit                 0
ldest                    0
linternal                0
dsource                  0
dtransit                 0
ddest                    0
dinternal                0
dssource                 0
dstransit                0
dsdest                   0
                      ... 
cldest                   0
clinternal               0
cssource                 0
cstransit                0
csdest                   0
csinternal               0
P

### Filter to just country, year and type

In [51]:
df = pd.DataFrame(data)
df = data[['country', 'year', "pdest", "ldest", "ddest", "dsdest", "cpdest", "cldest", "csdest"]]
df[0:5]

Unnamed: 0,country,year,pdest,ldest,ddest,dsdest,cpdest,cldest,csdest
0,United States,2001,No mention,No mention,No mention,No mention,No mention,No mention,No mention
1,United States,2002,No mention,No mention,No mention,No mention,No mention,No mention,No mention
2,United States,2003,No mention,No mention,No mention,No mention,No mention,No mention,No mention
3,United States,2004,No mention,No mention,No mention,No mention,No mention,No mention,No mention
4,United States,2005,No mention,No mention,No mention,No mention,No mention,No mention,No mention


### Boolean for labor types

In [50]:
# Boolean transform

d = {'Yes': True, 'No mention': False, 'No': False}
df = df.replace(d)
df[0:5]

Unnamed: 0,cldest,country,cpdest,csdest,ddest,dsdest,ldest,pdest,year
0,False,United States,False,False,False,False,False,False,2001
1,False,United States,False,False,False,False,False,False,2002
2,False,United States,False,False,False,False,False,False,2003
3,False,United States,False,False,False,False,False,False,2004
4,False,United States,False,False,False,False,False,False,2005


### Fill in missing values


In [13]:
def country_years_missing(dataframe):
    years = list(range(2001,2012))
    country = list(df.country.unique())
    for x in country:
        for y in years: 
            if dataframe.query("year == @y and country == @x").empty:
                d = {'country': [x], 'year': [y]}
                dataframe = dataframe.append(pd.DataFrame(d), ignore_index=True) 
    
    return dataframe

In [16]:
df = country_years_missing(df)

In [24]:
df[df.country == "United States"]

Unnamed: 0,cldest,country,cpdest,csdest,ddest,dsdest,ldest,pdest,year
0,False,United States,False,False,False,False,False,False,2001
1,False,United States,False,False,False,False,False,False,2002
2,False,United States,False,False,False,False,False,False,2003
3,False,United States,False,False,False,False,False,False,2004
4,False,United States,False,False,False,False,False,False,2005
5,True,United States,True,False,False,False,True,True,2007
6,True,United States,True,False,True,False,True,True,2008
7,True,United States,True,False,True,False,True,True,2009
8,True,United States,True,False,True,False,True,True,2010
9,True,United States,True,False,True,True,True,True,2011


### Write .json for data

In [26]:
# df.reset_index().to_dict(orient='dict')

final_object = df.T.to_dict().values()

In [48]:
import simplejson

with open('data.json', 'w') as outfile:
    simplejson.dump(final_object, outfile, ignore_nan=True)