In [1]:
#importing the libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from __future__ import unicode_literals
from IPython.core.display import HTML
# import d3_lib

In [2]:
#reading in the CSV file
gdp_data = pd.read_excel('datasets/gdp_Download-GDPcurrent-USD-countries.xls', skiprows=2)

In [3]:
#creating a time-series
time_series = []
for column in gdp_data.columns:
    if type(column) == int:
        time_series.append(column)

In [4]:
#creating the country set
country_set = set(gdp_data.Country)

In [5]:
#creating the shortened versions of the indicators
meanings = {'Exports of goods and services':'Ex', 'General government final consumption expenditure':'G', 'Gross Domestic Product (GDP)':'GDP', 'Imports of goods and services':'Im', 'Household consumption expenditure (including Non-profit institutions serving households)':'C', 'Gross capital formation':'I'}

In [6]:
'''converts the long-form of the column name to the shortened version as per the meanings dict'''
def convertname(x):
    if x in meanings.keys():
        return meanings[x]
    else:
        return x

In [7]:
#an annonymous function that converts names in the IndicatorName columun into its shortened version per the meanings dict
converted_name = gdp_data.IndicatorName.map(lambda x: convertname(x))

In [8]:
#creates a new column to the end of the dataframe that is a concatenation of the country name and the shortened indicator name
gdp_data['Concat'] = gdp_data['Country']+ " " + converted_name

In [9]:
#sets the index column of the dataframe to the Concat column for ease of filtering
gdp_data.set_index('Concat', inplace=True)

In [10]:
#fills in all NaN values with 0's
gdp_data.fillna(0, inplace=True)

In [11]:
#converts all float data-types to integers
for column in gdp_data.columns:
    if gdp_data[column].dtype == float:
        gdp_data[column] = gdp_data[column].astype(int)

In [12]:
#removes the index name as a header
gdp_data.index.name = None

In [13]:
#drops all rows that do not do not have abbreviated indicator names from the dataframe
abbv_list = ['C','I','G','Ex','Im', 'GDP']
for indicator in gdp_data.index:
    split_indicator = indicator.split()
    if split_indicator[1].strip() not in abbv_list:
        gdp_data.drop(indicator, inplace = True)

In [14]:
#removing the Country and IndicatorName columns
remove_cols = ['IndicatorName', 'Country']
gdp_data.drop(remove_cols, axis=1, inplace=True)

In [18]:
#declaring chart variables

#reminder of which countries are available
print country_set
print '\n'

#user selects countries to investigate
user_country_select = raw_input('Please type in the name of the country or countries you want to investigate (separate with ", "): ')
user_country_list = list(user_country_select.split(', '))
print user_country_list 
print '\n'

print 'For Consumer/Personal consumption, type C' + '\n',\
'For Investment/Business consumption, type I' + '\n',\
'For Government consumption, type G' + '\n',\
'For Total Exports, type Ex' + '\n',\
'For Total Imports, type Im' + '\n',\
'For Aggregate consumption, type GDP'+ '\n'

user_indicator_select = raw_input('Please type in the abbrieviate name of the GDP Indicator.(separate with ", "): ')
user_indicator_list = list(user_indicator_select.split(', '))
print user_indicator_list
print '\n'

set([u'Canada', u'Sao Tome and Principe', u'Turkmenistan', u'Yugoslavia (Former)', u'Lithuania', u'Cambodia', u'Switzerland', u'Ethiopia', u'Aruba', u'Swaziland', u'Argentina', u'Cameroon', u'Burkina Faso', u'Ghana', u'Saudi Arabia', u'Japan', u'State of Palestine', u'Slovenia', u'Guatemala', u'Bosnia and Herzegovina', u'Kuwait', u'Russian Federation', u'Jordan', u'Dominica', u'Liberia', u'Maldives', u'Jamaica', u'Oman', u'Cabo Verde', u'Albania', u'Gabon', u'Monaco', u'Samoa', u'New Zealand', u'Yemen', u'Andorra', u'Greenland', u"Lao People's DR", u'United Arab Emirates', u'Czechoslovakia (Former)', u'India', u'Azerbaijan', u'Lesotho', u'Kenya', u'Tajikistan', u'Turkey', u'Afghanistan', u'Micronesia (FS of)', u'Bangladesh', u'Mauritania', u'Iran (Islamic Republic of)', u'Sudan (Former)', u'Saint Lucia', u'San Marino', u'French Polynesia', u'France', u'Syrian Arab Republic', u'Bermuda', u'Slovakia', u'Somalia', u'Peru', u'Vanuatu', u'Nauru', u'Seychelles', u'Norway', u'Malawi', u'Cook 

In [79]:
#grouping the indicators and countries
gdp_request_grouping = []

for indicator in user_indicator_list:
    inner_grouping = []
    for country in user_country_list:
        indicator_country = country +" "+ indicator
        if indicator == 'C':
            inner_grouping.append(indicator_country)
        elif indicator == 'I':
            inner_grouping.append(indicator_country)
        elif indicator == 'G':
            inner_grouping.append(indicator_country)
        elif indicator == 'Ex':
            inner_grouping.append(indicator_country)
        elif indicator == 'Im':
            inner_grouping.append(indicator_country)
        elif indicator == 'GDP':
            inner_grouping.append(indicator_country)
        else:
            print 'oops'
    gdp_request_grouping.append(inner_grouping)

print gdp_request_grouping

[[u'Congo C', u'Greece C', u'Paraguay C'], [u'Congo G', u'Greece G', u'Paraguay G'], [u'Congo GDP', u'Greece GDP', u'Paraguay GDP']]
