# Migration of Chinese Laborers

Chinese migration has been theorized to be a major source of soft influence - in the form of exposing cultures to the Chinese culture - and direct influence - through using Chinese laborers in the BRI for infrastructure development and firm management. 

Visualizing where Chinese Immigration is occuring provides valuable information into how China is increasing their global presence. 

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

import os
import json

# for visualizations 
from scipy.stats import linregress

__Function definitions__

In [2]:
def load_dict(path): 
    file = open(path, "r")
    contents = file.read()
    dictionary = json.loads(contents)
    file.close()
    return dictionary

In [3]:
# Choose YOUR data focus 

#interest = "chinese"
interest = "us"

### Bring in needed data, geocode and process

In [4]:
git_repo_loc = os.path.dirname(os.path.realpath(interest + "_migration.ipynb"))
df = pd.read_excel(git_repo_loc + "/undesa_pd_2020_ims_stock_by_sex_destination_and_origin.xlsx", sheet_name='Table 1 - Transformed', header=10, index_col=0)

In [5]:
# restrict to only x country immigrants 

x = {
    "chinese" : df.loc[df['Region, development group, country or area of origin'].str.contains("China") & ~df['Region, development group, country or area of origin'].str.contains("excluding China")], 
    "us" : df.loc[df['Region, development group, country or area of origin'].str.contains("United States of America*")]
}

ci = x[interest]

ci['Region, development group, country or area of destination'] = ci['Region, development group, country or area of destination'].str.strip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ci['Region, development group, country or area of destination'] = ci['Region, development group, country or area of destination'].str.strip()


In [6]:
ci.head()

Unnamed: 0_level_0,"Region, development group, country or area of destination",Notes of destination,Location code of destination,Type of data of destination,"Region, development group, country or area of origin",Location code of origin,1990,1995,2000,2005,...,2010males,2015males,2020males,1990females,1995females,2000females,2005females,2010females,2015females,2020females
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
260,WORLD,,900,,United States of America*,840,1726213,1827431,1979327,2296999,...,1331560,1365833,1504237,864107,914161,991011,1141623,1339092,1351928,1491986
543,Sub-Saharan Africa,,947,,United States of America*,840,20299,18222,18629,21069,...,9893,9751,10795,10483,9176,9457,10687,9367,8604,9432
826,Northern Africa and Western Asia,,1833,,United States of America*,840,111333,112277,112085,122171,...,77751,77630,83928,52747,53811,53637,57858,67219,68492,73259
1109,Central and Southern Asia,,921,,United States of America*,840,34730,35094,35490,61099,...,43361,43107,30787,16355,16382,16464,26639,38851,39101,48268
1392,Eastern and South-Eastern Asia,,1832,,United States of America*,840,94979,115423,143699,159106,...,103649,126102,142704,43113,53561,68506,69256,80886,86565,95431


In [7]:
# we will consider all regions of China as migrant origins 

ci_s = ci.groupby('Region, development group, country or area of destination')[[
                                                                         2000, 
                                                                         2005, 
                                                                         2010, 
                                                                         2015, 
                                                                         2020, 
                                                                         '2000males',
                                                                         '2005males',
                                                                         '2010males',
                                                                         '2015males',
                                                                         '2020males',
                                                                         '2000females',
                                                                         '2005females',
                                                                         '2010females',
                                                                         '2015females',
                                                                         '2020females']].sum().reset_index()

In [8]:
ci_s['growth'] = [linregress([2000, 2005, 2010, 2015, 2020], np.log(x.values))[0] * 100 for x in ci_s[[2000, 2005, 2010, 2015, 2020]].iloc]
ci_s['average'] = [np.mean(x.values) for x in ci_s[[2000, 2005, 2010, 2015, 2020]].iloc]

  ci_s['growth'] = [linregress([2000, 2005, 2010, 2015, 2020], np.log(x.values))[0] * 100 for x in ci_s[[2000, 2005, 2010, 2015, 2020]].iloc]
  X -= avg[:, None]


### Bring in country level data to showcase geospatial migration patterns

In [9]:
# add in localities to the spreadsheet 
countries = pd.read_csv("../../data_final/countries.csv")

countries.head()

Unnamed: 0,name,country_id,country,geometry,iso,shape_length,shape_area,bri_partner
0,American Samoa,1,American Samoa,MULTIPOLYGON (((-170.743900004 -14.37555495199...,AS,0.600124,0.01372,2018.0
1,United States Minor Outlying Islands,2,United States Minor Outlying Islands,MULTIPOLYGON (((-160.021144981 -0.398054947999...,UM,0.480216,0.003216,
2,Cook Islands,3,Cook Islands,MULTIPOLYGON (((-159.746981979 -21.25667301899...,CK,0.980664,0.013073,2018.0
3,French Polynesia,4,French Polynesia,MULTIPOLYGON (((-149.179199964 -17.87083591099...,PF,3.930211,0.175332,
4,Niue,5,Niue,MULTIPOLYGON (((-169.893891033 -19.14555507899...,NU,0.541413,0.021414,2018.0


In [10]:
# prepare to merge data 
# drop all * in dataset
ci_s['Region, development group, country or area of destination'] = ci_s['Region, development group, country or area of destination'].str.replace("*", "")

# move to common naming conventions
recipient_mapping = load_dict("../country_config.txt")
ci_s['Region, development group, country or area of destination'] = ci_s['Region, development group, country or area of destination'].replace(recipient_mapping)

  ci_s['Region, development group, country or area of destination'] = ci_s['Region, development group, country or area of destination'].str.replace("*", "")


In [11]:
# merge in migration data to the country level information we have 
ci_s = ci_s.merge(countries, left_on='Region, development group, country or area of destination', right_on='country')

## Export Data
Based on your use, choose which __export type__ you would like to use: 

In [12]:
# export geocoded immigration data for use in modelling and to pass to high side 

# drop unneeded columns 
for_export = ci_s.drop(columns=['name', 'iso', 'shape_area', 'shape_length', 'geometry', 'country', 'bri_partner'])
for_export = for_export.rename(columns={'Region, development group, country or area of destination': 'destination_area'})
for_export.to_csv("../../data_final/" + interest + "_immigration.csv", index=False)