## Analysing The DataSet

In [28]:
import pandas as pd
import numpy as np 
from pandas_profiling import ProfileReport
import matplotlib.pyplot as plt


csv = pd.read_csv("USA.csv")
df = pd.DataFrame(csv)
df.columns = ['Country Name', 'Country Code', 'Series Name', 'Series Code',
       '1990', '2000', '2011', '2012',
       '2013', '2014', '2015', '2016',
       '2017', '2018', '2019', '2020']
df.head().columns

Index(['Country Name', 'Country Code', 'Series Name', 'Series Code', '1990',
       '2000', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019', '2020'],
      dtype='object')

In [29]:
df.iloc[[55,56,57]]
df.drop([55,56,57], inplace=True)

## Cleaning the DataSet

In [30]:
df.index
df.reset_index(drop=True,inplace=True)

In [31]:
df.iloc[[55,56]]
df.drop([55,56], inplace=True)

In [32]:
df.isnull().sum()

Country Name    0
Country Code    0
Series Name     0
Series Code     0
1990            0
2000            0
2011            0
2012            0
2013            0
2014            0
2015            0
2016            0
2017            0
2018            0
2019            0
2020            0
dtype: int64

In [33]:
df.duplicated().sum()

0

In [34]:
df.columns

Index(['Country Name', 'Country Code', 'Series Name', 'Series Code', '1990',
       '2000', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019', '2020'],
      dtype='object')

In [35]:
df.drop(['2020'], axis=1, inplace=True)
df.head(3)

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1990,2000,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,USA,"Population, total",SP.POP.TOTL,249623000.0,282162411.0,311556874.0,313830990.0,315993715.0,318301008.0,320635163.0,322941311.0,324985539.0,326687501.0,328239523
1,United States,USA,Population growth (annual %),SP.POP.GROW,1.12965052045579,1.11276899679534,0.720017688665691,0.727268997193117,0.686773155567634,0.727517695817155,0.730641178178307,0.716669413429853,0.631007893230758,0.522337357899676,0.473953539373292
2,United States,USA,Surface area (sq. km),AG.SRF.TOTL.K2,9629090.0,9632030.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,..


## Overview 


In [36]:
profile = ProfileReport(df)
profile.to_file(output_file = "file.html")

Summarize dataset:   0%|          | 0/28 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Selecting Economy row


In [37]:
df.replace('..',0, inplace =True)

In [38]:
GDP_df = df.loc[df['Series Name'] == "GDP (current US$)"]
GDP_df

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1990,2000,2011,2012,2013,2014,2015,2016,2017,2018,2019
31,United States,USA,GDP (current US$),NY.GDP.MKTP.CD,5963144000000,10252345464000,15542581104000,16197007349000,16784849196000,17527163695000,18224704440000,18714960538000,19519353692000,20580159776000,21433226000000


In [39]:
records_1 = []
records_1 = GDP_df[['1990','2000','2011','2012','2013','2014','2015','2016','2017','2018','2019']]
records_1

Unnamed: 0,1990,2000,2011,2012,2013,2014,2015,2016,2017,2018,2019
31,5963144000000,10252345464000,15542581104000,16197007349000,16784849196000,17527163695000,18224704440000,18714960538000,19519353692000,20580159776000,21433226000000


In [40]:
arr = np.array(records_1)
arr = arr.astype('int')
ind = ['1990','2000','2011','2012','2013','2014','2015','2016','2017','2018','2019']
arr_ind = np.array(ind)
arr_ind.astype('int')
arr


array([[ 5963144000000, 10252345464000, 15542581104000, 16197007349000,
        16784849196000, 17527163695000, 18224704440000, 18714960538000,
        19519353692000, 20580159776000, 21433226000000]])

## Selecting Population row


In [41]:
df.head(3)

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1990,2000,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,USA,"Population, total",SP.POP.TOTL,249623000.0,282162411.0,311556874.0,313830990.0,315993715.0,318301008.0,320635163.0,322941311.0,324985539.0,326687501.0,328239523.0
1,United States,USA,Population growth (annual %),SP.POP.GROW,1.12965052045579,1.11276899679534,0.720017688665691,0.727268997193117,0.686773155567634,0.727517695817155,0.730641178178307,0.716669413429853,0.631007893230758,0.522337357899676,0.473953539373292
2,United States,USA,Surface area (sq. km),AG.SRF.TOTL.K2,9629090.0,9632030.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,9831510.0,0.0


In [42]:
population_df = df.loc[df['Series Code'] == 'SP.POP.TOTL']
population_df

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1990,2000,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,USA,"Population, total",SP.POP.TOTL,249623000,282162411,311556874,313830990,315993715,318301008,320635163,322941311,324985539,326687501,328239523


In [49]:
population = population_df[['1990','2000','2011','2012','2013','2014','2015','2016','2017','2018','2019']]
population = np.array(population).astype('int')

## Selecting Urban population row


In [50]:
urb_pop_df = df.loc[df['Series Code'] == 'SP.URB.GROW']
urb_pop = urb_pop_df[['1990','2000','2011','2012','2013','2014','2015','2016','2017','2018','2019']]
urb_pop = np.array(urb_pop).astype('float')

## New Dataframe

In [51]:
arr

array([[ 5963144000000, 10252345464000, 15542581104000, 16197007349000,
        16784849196000, 17527163695000, 18224704440000, 18714960538000,
        19519353692000, 20580159776000, 21433226000000]])

In [52]:
population

array([[249623000, 282162411, 311556874, 313830990, 315993715, 318301008,
        320635163, 322941311, 324985539, 326687501, 328239523]])

In [53]:
urb_pop

array([[1.41025629, 1.51201139, 0.93273635, 0.9432346 , 0.90842333,
        0.95358702, 0.96109854, 0.9502615 , 0.87014921, 0.76333937,
        0.72043985]])

In [57]:
new_set  = [
    {Eco:[ 5963144000000, 10252345464000, 15542581104000, 16197007349000,16784849196000, 17527163695000, 18224704440000, 18714960538000,19519353692000, 20580159776000, 21433226000000]},
    {Pop:[249623000, 282162411, 311556874, 313830990, 315993715, 318301008,320635163, 322941311, 324985539, 326687501, 328239523]},
    {Urb_pop:[1.41025629, 1.51201139, 0.93273635, 0.9432346 , 0.90842333,0.95358702, 0.96109854, 0.9502615 , 0.87014921, 0.76333937,0.72043985]
    }
]
new_set

NameError: name 'Eco' is not defined