In [1]:
import numpy as np
import pandas as pd

In [2]:
population = [35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523]
countries = ['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States']

In [3]:
g7_dict = {
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}

# Series

In [4]:
g7 = pd.Series(population)
g7

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [5]:
g7.name = 'G7 Population in Millions'

In [6]:
g7

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in Millions, dtype: float64

In [7]:
g7.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

### Indexing

In [8]:
g7.index

RangeIndex(start=0, stop=7, step=1)

In [9]:
g7[0]

35.467

In [10]:
g7[1:5]

1     63.951
2     80.940
3     60.665
4    127.061
Name: G7 Population in Millions, dtype: float64

In [11]:
g7[[1, 3, 6]]

1     63.951
3     60.665
6    318.523
Name: G7 Population in Millions, dtype: float64

In [12]:
g7.index = countries

In [13]:
g7

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [14]:
g7['France']

63.951

In [15]:
g7['Canada': 'Japan']

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
Name: G7 Population in Millions, dtype: float64

In [16]:
g7['Canada': 'United Kingdom': 2]

Canada      35.467
Germany     80.940
Japan      127.061
Name: G7 Population in Millions, dtype: float64

In [17]:
g7[['Italy', 'Canada']]

Italy     60.665
Canada    35.467
Name: G7 Population in Millions, dtype: float64

In [18]:
g7.iloc[3]

60.665

In [19]:
g7.iloc[:3]

Canada     35.467
France     63.951
Germany    80.940
Name: G7 Population in Millions, dtype: float64

In [20]:
g7.iloc[[3, 2, -1]]

Italy             60.665
Germany           80.940
United States    318.523
Name: G7 Population in Millions, dtype: float64

In [21]:
g7[0]

  g7[0]


35.467

In [22]:
g7.iloc[0]

35.467

In [23]:
d = dict(zip(countries, population))

In [24]:
g7 = pd.Series(d, name='G7 Population in millions')
g7

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

# DataFrame

In [25]:
df = pd.DataFrame(g7_dict)

In [26]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [27]:
df.index = countries

In [28]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [29]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [30]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [31]:
df.size

35

In [32]:
df.shape

(7, 5)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [34]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [35]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [36]:
df.describe(percentiles=[.2, .4, .6, .8])

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
20%,61.3222,2300933.0,312491.6,0.8886
40%,64.175,2880228.0,365440.4,0.8974
50%,64.511,2950039.0,377930.0,0.907
60%,74.3684,3504678.0,535579.4,0.9106
80%,117.8368,4456781.0,7748189.0,0.9146
max,318.523,17348080.0,9984670.0,0.916


In [37]:
df.describe(include='object')

Unnamed: 0,Continent
count,7
unique,3
top,Europe
freq,4


### Indexing

In [38]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [39]:
df.loc['Canada']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [40]:
df.loc['Canada', 'GDP']

1785387

In [41]:
df.loc['Canada']['GDP']

1785387

In [42]:
df.loc['Germany': 'United States']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [43]:
df.loc['Germany': 'United States', 'GDP': 'HDI']

Unnamed: 0,GDP,Surface Area,HDI
Germany,3874437,357114,0.916
Italy,2167744,301336,0.873
Japan,4602367,377930,0.891
United Kingdom,2950039,242495,0.907
United States,17348075,9525067,0.915


In [44]:
df.loc['Germany': 'United States', 'Surface Area']

Germany            357114
Italy              301336
Japan              377930
United Kingdom     242495
United States     9525067
Name: Surface Area, dtype: int64

In [45]:
df.loc['Germany': 'United States', 'Surface Area']['Japan']

377930

In [46]:
df.iloc[2]

Population        80.94
GDP             3874437
Surface Area     357114
HDI               0.916
Continent        Europe
Name: Germany, dtype: object

In [47]:
df.iloc[:5]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia


In [48]:
df.iloc[:, 2: 4]

Unnamed: 0,Surface Area,HDI
Canada,9984670,0.913
France,640679,0.888
Germany,357114,0.916
Italy,301336,0.873
Japan,377930,0.891
United Kingdom,242495,0.907
United States,9525067,0.915


In [49]:
df.iloc[2:4, [1, 3, 4]].loc['Italy']['HDI']

0.873

In [50]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [51]:
df['GDP']

Canada             1785387
France             2833687
Germany            3874437
Italy              2167744
Japan              4602367
United Kingdom     2950039
United States     17348075
Name: GDP, dtype: int64

In [52]:
df[['GDP', 'HDI']].loc['France': 'Japan']['HDI']['Germany']

0.916

In [53]:
df['Population': 'HDI']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent


In [54]:
df.loc[:, 'Population': 'HDI']

Unnamed: 0,Population,GDP,Surface Area,HDI
Canada,35.467,1785387,9984670,0.913
France,63.951,2833687,640679,0.888
Germany,80.94,3874437,357114,0.916
Italy,60.665,2167744,301336,0.873
Japan,127.061,4602367,377930,0.891
United Kingdom,64.511,2950039,242495,0.907
United States,318.523,17348075,9525067,0.915


In [55]:
df.Population.Japan

127.061

In [56]:
df.GDP

Canada             1785387
France             2833687
Germany            3874437
Italy              2167744
Japan              4602367
United Kingdom     2950039
United States     17348075
Name: GDP, dtype: int64

In [58]:
df.Surface Area

SyntaxError: invalid syntax (2423474398.py, line 1)

In [59]:
df.loc['Canada', 'Population'] = 35.7

In [60]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.7,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


### Drop

In [61]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [62]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.7,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [63]:
df.drop(['Canada', 'Japan'])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [64]:
df.drop('GDP', axis=1)

Unnamed: 0,Population,Surface Area,HDI,Continent
Canada,35.7,9984670,0.913,America
France,63.951,640679,0.888,Europe
Germany,80.94,357114,0.916,Europe
Italy,60.665,301336,0.873,Europe
Japan,127.061,377930,0.891,Asia
United Kingdom,64.511,242495,0.907,Europe
United States,318.523,9525067,0.915,America


In [65]:
df.drop(index=['Italy', 'United Kingdom'], columns='Continent')

Unnamed: 0,Population,GDP,Surface Area,HDI
Canada,35.7,1785387,9984670,0.913
France,63.951,2833687,640679,0.888
Germany,80.94,3874437,357114,0.916
Japan,127.061,4602367,377930,0.891
United States,318.523,17348075,9525067,0.915


In [66]:
lst_of_idx = list(df.loc['Canada': 'Japan'].index)

In [67]:
lst_of_idx.append('United States')

In [68]:
lst_of_idx

['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United States']

In [69]:
df.drop(lst_of_idx)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
United Kingdom,64.511,2950039,242495,0.907,Europe


### Operations

In [70]:
df[['Population', 'GDP']] / 100

Unnamed: 0,Population,GDP
Canada,0.357,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60665,21677.44
Japan,1.27061,46023.67
United Kingdom,0.64511,29500.39
United States,3.18523,173480.75


In [71]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.7,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [72]:
crisis = pd.Series([-500000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -500000.0
HDI        -0.3
dtype: float64

In [73]:
df[['GDP', 'HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [74]:
df[['GDP', 'HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,1285387.0,0.613
France,2333687.0,0.588
Germany,3374437.0,0.616
Italy,1667744.0,0.573
Japan,4102367.0,0.591
United Kingdom,2450039.0,0.607
United States,16848075.0,0.615


# Modyfying DataFrames

In [75]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.7,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [76]:
langs = pd.Series(['French', 'German', 'Italian'], index=['France', 'Germany', 'Italy'])

In [77]:
langs

France      French
Germany     German
Italy      Italian
dtype: object

In [78]:
df['Language'] = langs

In [79]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.7,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [80]:
np.nan

nan

In [81]:
df['Language'] = 'English'

In [82]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.7,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


**rename**

In [83]:
df.rename(index={'United Kingdom': 'UK', 'United States': 'USA'},
          columns={'GDP': 'Gross Domestic Product', 'HDI': 'Human Development Index'},
          inplace=True
         )

In [84]:
df

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
Canada,35.7,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [85]:
df.rename(index=str.upper)

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
CANADA,35.7,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.665,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [86]:
df.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)

In [87]:
df

Unnamed: 0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [88]:
df.rename_axis(index='Countries', columns='Parameters')

Parameters,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Canada,35.7,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [89]:
df.loc['China'] = pd.Series({'Population': 1400, 'Continent': 'Asia'})

In [90]:
df

Unnamed: 0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387.0,9984670.0,0.913,America,English
France,63.951,2833687.0,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,377930.0,0.891,Asia,English
UK,64.511,2950039.0,242495.0,0.907,Europe,English
USA,318.523,17348075.0,9525067.0,0.915,America,English
China,1400.0,,,,Asia,


**Index changing**

In [91]:
df.reset_index()

Unnamed: 0,index,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
0,Canada,35.7,1785387.0,9984670.0,0.913,America,English
1,France,63.951,2833687.0,640679.0,0.888,Europe,English
2,Germany,80.94,3874437.0,357114.0,0.916,Europe,English
3,Italy,60.665,2167744.0,301336.0,0.873,Europe,English
4,Japan,127.061,4602367.0,377930.0,0.891,Asia,English
5,UK,64.511,2950039.0,242495.0,0.907,Europe,English
6,USA,318.523,17348075.0,9525067.0,0.915,America,English
7,China,1400.0,,,,Asia,


In [92]:
df

Unnamed: 0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387.0,9984670.0,0.913,America,English
France,63.951,2833687.0,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,377930.0,0.891,Asia,English
UK,64.511,2950039.0,242495.0,0.907,Europe,English
USA,318.523,17348075.0,9525067.0,0.915,America,English
China,1400.0,,,,Asia,


In [93]:
df.set_index('Population').loc[35.700]

Gross_Domestic_Product     1785387.0
Surface_Area               9984670.0
Human_Development_Index        0.913
Continent                    America
Language                     English
Name: 35.7, dtype: object

In [96]:
df.set_index('Continent').loc['Europe']

Unnamed: 0_level_0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Language
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Europe,63.951,2833687.0,640679.0,0.888,English
Europe,80.94,3874437.0,357114.0,0.916,English
Europe,60.665,2167744.0,301336.0,0.873,English
Europe,64.511,2950039.0,242495.0,0.907,English


### Create a new column using existing

In [97]:
df

Unnamed: 0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387.0,9984670.0,0.913,America,English
France,63.951,2833687.0,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,377930.0,0.891,Asia,English
UK,64.511,2950039.0,242495.0,0.907,Europe,English
USA,318.523,17348075.0,9525067.0,0.915,America,English
China,1400.0,,,,Asia,


In [98]:
df['GDP per Capita'] = df.Gross_Domestic_Product / df.Population

In [99]:
df

Unnamed: 0,Population,Gross_Domestic_Product,Surface_Area,Human_Development_Index,Continent,Language,GDP per Capita
Canada,35.7,1785387.0,9984670.0,0.913,America,English,50010.840336
France,63.951,2833687.0,640679.0,0.888,Europe,English,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,English,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,English,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,English,36221.712406
UK,64.511,2950039.0,242495.0,0.907,Europe,English,45729.239975
USA,318.523,17348075.0,9525067.0,0.915,America,English,54464.12033
China,1400.0,,,,Asia,,


In [100]:
df.insert(2, 'GDP_per_Capita', df.pop('GDP per Capita'))

In [101]:
df

Unnamed: 0,Population,Gross_Domestic_Product,GDP_per_Capita,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387.0,50010.840336,9984670.0,0.913,America,English
France,63.951,2833687.0,44310.284437,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,47868.013343,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,35733.025633,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,36221.712406,377930.0,0.891,Asia,English
UK,64.511,2950039.0,45729.239975,242495.0,0.907,Europe,English
USA,318.523,17348075.0,54464.12033,9525067.0,0.915,America,English
China,1400.0,,,,,Asia,


### Statistics methods

In [104]:
df.min(numeric_only=True).round(3)

Population                      35.700
Gross_Domestic_Product     1785387.000
GDP_per_Capita               35733.026
Surface_Area                242495.000
Human_Development_Index          0.873
dtype: float64

In [105]:
df.max(numeric_only=True)

Population                 1.400000e+03
Gross_Domestic_Product     1.734808e+07
GDP_per_Capita             5.446412e+04
Surface_Area               9.984670e+06
Human_Development_Index    9.160000e-01
dtype: float64

In [106]:
df.sum(numeric_only=True)

Population                 2.151351e+03
Gross_Domestic_Product     3.556174e+07
GDP_per_Capita             3.143372e+05
Surface_Area               2.142929e+07
Human_Development_Index    6.303000e+00
dtype: float64

In [107]:
df.mean(numeric_only=True)

Population                 2.689189e+02
Gross_Domestic_Product     5.080248e+06
GDP_per_Capita             4.490532e+04
Surface_Area               3.061327e+06
Human_Development_Index    9.004286e-01
dtype: float64

In [108]:
df.median(numeric_only=True)

Population                 7.272550e+01
Gross_Domestic_Product     2.950039e+06
GDP_per_Capita             4.572924e+04
Surface_Area               3.779300e+05
Human_Development_Index    9.070000e-01
dtype: float64

In [110]:
df

Unnamed: 0,Population,Gross_Domestic_Product,GDP_per_Capita,Surface_Area,Human_Development_Index,Continent,Language
Canada,35.7,1785387.0,50010.840336,9984670.0,0.913,America,English
France,63.951,2833687.0,44310.284437,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,47868.013343,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,35733.025633,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,36221.712406,377930.0,0.891,Asia,English
UK,64.511,2950039.0,45729.239975,242495.0,0.907,Europe,English
USA,318.523,17348075.0,54464.12033,9525067.0,0.915,America,English
China,1400.0,,,,,Asia,


In [109]:
df.mode()

Unnamed: 0,Population,Gross_Domestic_Product,GDP_per_Capita,Surface_Area,Human_Development_Index,Continent,Language
0,35.7,1785387.0,35733.025633,242495.0,0.873,Europe,English
1,60.665,2167744.0,36221.712406,301336.0,0.888,,
2,63.951,2833687.0,44310.284437,357114.0,0.891,,
3,64.511,2950039.0,45729.239975,377930.0,0.907,,
4,80.94,3874437.0,47868.013343,640679.0,0.913,,
5,127.061,4602367.0,50010.840336,9525067.0,0.915,,
6,318.523,17348075.0,54464.12033,9984670.0,0.916,,
7,1400.0,,,,,,


In [111]:
df.std(numeric_only=True)

Population                 4.658050e+02
Gross_Domestic_Product     5.494020e+06
GDP_per_Capita             6.913555e+03
Surface_Area               4.576187e+06
Human_Development_Index    1.659174e-02
dtype: float64

In [115]:
df.quantile(.05, numeric_only=True).round(3)

Population                      44.438
Gross_Domestic_Product     1900094.100
GDP_per_Capita               35879.632
Surface_Area                260147.300
Human_Development_Index          0.878
Name: 0.05, dtype: float64

In [116]:
df.quantile([.2, .4, .6, .9], numeric_only=True)

Unnamed: 0,Population,Gross_Domestic_Product,GDP_per_Capita,Surface_Area,Human_Development_Index
0.2,61.9794,2300932.6,37839.426812,312491.6,0.8886
0.4,64.399,2880227.8,44877.866652,365440.4,0.8974
0.6,90.1642,3504677.8,47012.503996,535579.4,0.9106
0.9,642.9661,9700650.2,51792.152334,9708908.2,0.9154


### Correlation

In [117]:
df.corr(numeric_only=True)

Unnamed: 0,Population,Gross_Domestic_Product,GDP_per_Capita,Surface_Area,Human_Development_Index
Population,1.0,0.990341,0.451807,0.465306,0.322123
Gross_Domestic_Product,0.990341,1.0,0.558655,0.533703,0.396534
GDP_per_Capita,0.451807,0.558655,1.0,0.720954,0.865778
Surface_Area,0.465306,0.533703,0.720954,1.0,0.552458
Human_Development_Index,0.322123,0.396534,0.865778,0.552458,1.0


- Very strong: >0.9 
- Strong: 0.7 - 0.9 
- Moderate: 0.3 - 0.7 
- Weak: 0.1 - 0.3 
- Very weak: <0.1