In [2]:
import pandas as pd
import numpy as np
from google.cloud import bigquery

In [None]:
%load_ext google.cloud.bigquery

from IPython.display import clear_output
from tqdm import tqdm
import time

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns

sns.set(style='darkgrid', font_scale=1.5)
current_palette = sns.color_palette('deep')


### Preparing the csv file containing GDP per capita information for all the countries for the year 2012

***Source:*** The original csv file containing GDP per capita data was downloaded from [World Bank data](https://data.worldbank.org/indicator/ny.gdp.pcap.pp.cd?view=map) 2012.


In [5]:
data_folder = './data/'
# Loading the initial table
df_gdp = pd.read_csv(data_folder+'country_gdpPerCapita.csv', skiprows=2,
                  header=1, usecols=['Country Name', 'Country Code', '2012'])\
        .rename(columns={'Country Name':'country_name', 'Country Code':'country_code', '2012':'gdppc'})
# Mapping the country codes to "ISO3166-1-Alpha-2" codes
df_map = pd.read_csv(data_folder+"country_codes.csv", usecols=['ISO3166-1-Alpha-3', 'ISO3166-1-Alpha-2'])\
           .rename(columns={'ISO3166-1-Alpha-3':'iso3', 'ISO3166-1-Alpha-2':'iso2'})
df_map = (df_map[df_map.iso3!=df_map.iso2]).drop_duplicates().set_index('iso3')
df_gdp['country_code'] = df_gdp.country_code.map(df_map.to_dict()['iso2'])
print(df_gdp.shape[0])
display(df_gdp.head(2))

# Saving the resulting dataframe
df_gdp.to_csv(data_folder+'country_gdpPerCapita_2012.csv', index=False, encoding='utf-8')

264


Unnamed: 0,country_name,country_code,gdppc
0,Aruba,AW,35498.982089
1,Afghanistan,AF,1806.76393


## Creating the BigQuery table

In [9]:
# Before running this line, make sure that you have defined the environmental variable...
# ..."GOOGLE_APPLICATION_CREDENTIALS" which points to the JSON file containing authentication key with a valid access key
client = bigquery.Client()

schema_Name_org = [
    bigquery.SchemaField('country_name', 'STRING', 'NULLABLE', None, ()),
    bigquery.SchemaField('country_code', 'STRING', 'NULLABLE', None, ()),
    bigquery.SchemaField('gdppc', 'FLOAT', 'NULLABLE', None, ())
]

dataset_id = 'data_preparation'
job_config = bigquery.LoadJobConfig()
job_config.skip_leading_rows = 1
job_config.max_bad_records = 10
job_config.source_format = bigquery.SourceFormat.CSV
job_config.schema = schema_Name_org

dataset_ref = client.dataset(dataset_id)
table_ref = dataset_ref.table('6_gdppc_country')
load_job = client.load_table_from_uri(
        source_uris='gs://uspto-data/data_preparation/6_country_gdpPerCapita_2012.csv',
        destination=table_ref,
        # job_id=job_id,
        job_id_prefix='lgs-',
        job_config=job_config,
    )
load_job.result()

<google.cloud.bigquery.job.LoadJob at 0x25e76cd3a58>