In [2]:
"""
load/import all the requried packages. give the packages an alias to make code blocks more compact.
we're loading the pandas package and giving it an alias 'pd'. so in the rest of the code we can just use 'pd' as a
short form to access all the functions available in the pandas pandas library. we're doing the same with
plotly.graph_objects package.

pandas: pandas is popular python library used for data extraction, manipulation, wrangling, and analysis.
plotly: plotly is a javascript graphing library that allows users to visualize data by creating interactive plots.

"""
import pandas as pd
import plotly.graph_objects as go

In [3]:
"""
let's use the 'read_csv()' file from pandas to load the 'gdp_csv.csv' into a data frame called 'dd'
"""
dd = pd.read_csv('gdp_csv.csv')

In [4]:
"""
let's check the shape of the data frame using the 'shape' function. we can see our 'dd' data frame has 11507 rows and
4 columns.
"""
dd.shape

(11507, 4)

In [5]:
"""
let's view the first 20 rows of the data using the 'head()' function for data frames
"""
dd.head(20)

Unnamed: 0,Country Name,Country Code,Year,Value
0,Arab World,ARB,1968,25760680000.0
1,Arab World,ARB,1969,28434200000.0
2,Arab World,ARB,1970,31385500000.0
3,Arab World,ARB,1971,36426910000.0
4,Arab World,ARB,1972,43316060000.0
5,Arab World,ARB,1973,55018390000.0
6,Arab World,ARB,1974,105145800000.0
7,Arab World,ARB,1975,116337000000.0
8,Arab World,ARB,1976,144846200000.0
9,Arab World,ARB,1977,167308300000.0


In [8]:
"""
we can use the 'nunique()' function to count the number of unique values in a column. let's check how many unique
countries we have in our data frame
"""
dd['Country Name'].nunique()

256

In [9]:
"""
now let's say you want to see all the unique values of a column instead of just counting them. In that case, we use
the 'unique()' function with the specific column
"""
dd['Country Name'].unique()

array(['Arab World', 'Caribbean small states',
       'Central Europe and the Baltics', 'Early-demographic dividend',
       'East Asia & Pacific',
       'East Asia & Pacific (excluding high income)',
       'East Asia & Pacific (IDA & IBRD countries)', 'Euro area',
       'Europe & Central Asia',
       'Europe & Central Asia (excluding high income)',
       'Europe & Central Asia (IDA & IBRD countries)', 'European Union',
       'Fragile and conflict affected situations',
       'Heavily indebted poor countries (HIPC)', 'High income',
       'IBRD only', 'IDA & IBRD total', 'IDA blend', 'IDA only',
       'IDA total', 'Late-demographic dividend',
       'Latin America & Caribbean',
       'Latin America & Caribbean (excluding high income)',
       'Latin America & the Caribbean (IDA & IBRD countries)',
       'Least developed countries: UN classification',
       'Low & middle income', 'Low income', 'Lower middle income',
       'Middle East & North Africa',
       'Middle East & No

In [10]:
"""
we're going to reorder our data frame based on the year and country. so, we want both the year and the country in
ascending order. we can use the 'sort_values()' function to do that and set ascending = True for both columns.
"""
ndd = dd.sort_values(by = ['Year','Country Name'], ascending = [True, True])

In [11]:
"""
now if we look at the first few rows of the data frame, we can see that it is ordered by year and country. But now
the index for the rows are not in ascending order from 0. it's actualing showing the index of that row before we
ordered the data. we should fix that
"""
ndd.head()

Unnamed: 0,Country Name,Country Code,Year,Value
2306,Afghanistan,AFG,1960,537777800.0
2377,Algeria,DZA,1960,2723649000.0
2668,Australia,AUS,1960,18593350000.0
2725,Austria,AUT,1960,6592694000.0
2809,"Bahamas, The",BHS,1960,169803900.0


In [12]:
"""
so the way to set the index to start from 0 and increase in an ascending order, we use a function called
'reset_index()'. we also set the parameter drop=True so that a column with the previous indices are not included in
the data frame.
"""
ndd=ndd.reset_index(drop=True)

In [20]:
"""
now if we look at the data frame, we see that the indices are what we expect them to be.
"""
ndd.head()

Unnamed: 0,Country Name,Country Code,Year,Value
0,Afghanistan,AFG,1960,537777800.0
1,Algeria,DZA,1960,2723649000.0
2,Australia,AUS,1960,18593350000.0
3,Austria,AUT,1960,6592694000.0
4,"Bahamas, The",BHS,1960,169803900.0


In [21]:
"""
let's check the shape of the data frame again after the reordering.
"""
ndd.shape

(11507, 4)

In [36]:
"""
let's define a dictionary called 'themes', where we will define all the colors for different components of the plot.
"""
themes={
    'theme1': {
    'plot_title':'#333','axis_title':'#000000', 'grid_color':'#e8edea', 'plot_ticks':'#000000',
    'background':'#ffffff','legend': '#000000'
    }
}

In [37]:
"""
let's create a list called 'values', which will consist of the countries that we will visualize.
"""
values=['United States','China', 'Germany', 'Japan']

In [38]:
"""
here, we create a graph object with 'go.Figure()'. think of this as a blank cavas, then we use a for loop to loop
through the selected values and add traces to the graph object. think of traces as layers on the blank canvas.
then finally we add some layout to make the plot look prettier and display it.
"""
fig = go.Figure()

for i in values:
    dd_by_key = ndd.loc[ndd['Country Name'] == i]
    fig.add_trace(
        go.Scatter(
                x=dd_by_key['Year'],
                y=dd_by_key['Value'],
                text=dd_by_key['Country Name'],
                hovertemplate= "Country: %{text}<br>Year: %{x}<br>GDP: %{y}",
                name="%s's GDP"%i,
                opacity=0.8,
                mode = 'lines+markers'
        )
    )

fig.update_layout(
            title={'text': "GDP Over Time", 'font':{'color':themes['theme1']['plot_title'], 'size':18}},
            xaxis={'title': None,
             'tickfont':{'color':themes['theme1']['plot_ticks']},'gridcolor':themes['theme1']['grid_color']
              },
            yaxis={'title': 'GDP', 'titlefont':{'color':themes['theme1']['axis_title']},
             'tickfont':{'color':themes['theme1']['plot_ticks']},'gridcolor':themes['theme1']['grid_color']},
            margin={'l': 40, 'b': 40, 't': 30, 'r': 10},
            hovermode='closest',
            plot_bgcolor = themes['theme1']['background'],
            paper_bgcolor= themes['theme1']['background'],
            legend = {'font':{'color':themes['theme1']['legend']}}

)

fig.show()