In [37]:
import pandas as pd

# 1. Series

## 1.1 Create Series

In [38]:
population = [35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523]
countries = ['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States']

In [39]:
g7 = pd.Series(population)
g7

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [40]:
g7.name = 'G7 Population in millions'
g7

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in millions, dtype: float64

## 1.2 Indexing

In [41]:
g7[0]

35.467

In [42]:
g7[1:5]

1     63.951
2     80.940
3     60.665
4    127.061
Name: G7 Population in millions, dtype: float64

In [43]:
g7[[1, 3, 6]]

1     63.951
3     60.665
6    318.523
Name: G7 Population in millions, dtype: float64

In [44]:
g7.index = countries
g7

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [45]:
g7['Italy']

60.665

In [46]:
g7['France':'United States':2]

France            63.951
Italy             60.665
United Kingdom    64.511
Name: G7 Population in millions, dtype: float64

In [47]:
g7.iloc[1]

63.951

In [48]:
g7.iloc[1:6:2]

France            63.951
Italy             60.665
United Kingdom    64.511
Name: G7 Population in millions, dtype: float64

In [49]:
g7.iloc[[2, 3, 5, 6]]

Germany            80.940
Italy              60.665
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

In [50]:
g7['Canada'] = 40.5
g7

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in millions, dtype: float64

# 2. DataFrame

## 2.1 Create & overview

In [51]:
g7_dict = {
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}

In [52]:
df = pd.DataFrame(g7_dict)

In [53]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [54]:
df.index = countries
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [55]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [56]:
df.size

35

In [57]:
df.shape

(7, 5)

In [58]:
df.shape[0]

7

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [60]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


## 2.2 Indexing, Selection and Slicing

In [61]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [62]:
df.loc['Canada']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [63]:
df.loc['Canada', 'GDP']

1785387

In [64]:
# Same as above
df.loc['Canada']['GDP']

1785387

In [65]:
df.loc['France': 'United States': 2]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Italy,60.665,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe


In [66]:
df.loc['France': 'United States': 2, 'Continent']

France            Europe
Italy             Europe
United Kingdom    Europe
Name: Continent, dtype: object

In [67]:
df.loc['France': 'United States': 2, 'Continent']['France']

'Europe'

In [68]:
df.iloc[2]

Population        80.94
GDP             3874437
Surface Area     357114
HDI               0.916
Continent        Europe
Name: Germany, dtype: object

In [69]:
df.iloc[:5][['GDP', 'HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891


In [70]:
df.iloc[2:4, 2:4]

Unnamed: 0,Surface Area,HDI
Germany,357114,0.916
Italy,301336,0.873


In [71]:
df.iloc[2:4, [1, 3, 4]].loc['Italy']

GDP          2167744
HDI            0.873
Continent     Europe
Name: Italy, dtype: object

In [72]:
df['Population']

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [73]:
df['Population']['Japan']

127.061

In [74]:
df[['Population', 'GDP', 'Continent']]

Unnamed: 0,Population,GDP,Continent
Canada,35.467,1785387,America
France,63.951,2833687,Europe
Germany,80.94,3874437,Europe
Italy,60.665,2167744,Europe
Japan,127.061,4602367,Asia
United Kingdom,64.511,2950039,Europe
United States,318.523,17348075,America


In [75]:
df.iloc[:, 1:4]

Unnamed: 0,GDP,Surface Area,HDI
Canada,1785387,9984670,0.913
France,2833687,640679,0.888
Germany,3874437,357114,0.916
Italy,2167744,301336,0.873
Japan,4602367,377930,0.891
United Kingdom,2950039,242495,0.907
United States,17348075,9525067,0.915


In [76]:
# using .column_name
df.Population

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [77]:
df.Population.Canada

35.467

In [78]:
df.loc['Canada', 'Population'] = 35.2
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.2,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


## 2.3 Drop Values and Columns

In [79]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [80]:
df.drop(['Canada', 'Japan'])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [81]:
df.drop(columns=['GDP', 'Surface Area'])

Unnamed: 0,Population,HDI,Continent
Canada,35.2,0.913,America
France,63.951,0.888,Europe
Germany,80.94,0.916,Europe
Italy,60.665,0.873,Europe
Japan,127.061,0.891,Asia
United Kingdom,64.511,0.907,Europe
United States,318.523,0.915,America


**axis=0 - rows \
axis=1 - colums**

In [82]:
df.drop('Italy', axis=0)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.2,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [83]:
df.drop('Continent', axis=1)

Unnamed: 0,Population,GDP,Surface Area,HDI
Canada,35.2,1785387,9984670,0.913
France,63.951,2833687,640679,0.888
Germany,80.94,3874437,357114,0.916
Italy,60.665,2167744,301336,0.873
Japan,127.061,4602367,377930,0.891
United Kingdom,64.511,2950039,242495,0.907
United States,318.523,17348075,9525067,0.915


## 2.4 Operation

In [84]:
df[['Population', 'GDP']] / 100

Unnamed: 0,Population,GDP
Canada,0.352,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60665,21677.44
Japan,1.27061,46023.67
United Kingdom,0.64511,29500.39
United States,3.18523,173480.75


In [85]:
crisis = pd.Series([-500000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -500000.0
HDI        -0.3
dtype: float64

In [86]:
df[['GDP', 'HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [87]:
df[['GDP', 'HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,1285387.0,0.613
France,2333687.0,0.588
Germany,3374437.0,0.616
Italy,1667744.0,0.573
Japan,4102367.0,0.591
United Kingdom,2450039.0,0.607
United States,16848075.0,0.615


## 2.5 Modyfying DataFrames

In [88]:
langs = pd.Series(['French', 'German', 'Italian'], index=['France', 'Germany', 'Italy'], name='Language')
langs

France      French
Germany     German
Italy      Italian
Name: Language, dtype: object

In [90]:
df['Language'] = langs
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.2,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [91]:
df.Language = 'English'
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.2,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


___
**Rename columns**

In [92]:
df.rename(
    index={
    'United States': 'USA',
    'United Kingdom': 'UK'
}, columns={
    'GDP': 'Gross Domestic Product',
    'HDI': 'Human Development Index'
}, inplace=True)

In [93]:
df

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
Canada,35.2,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [62]:
df.rename(index=str.upper)

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
CANADA,35.2,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.665,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


___
**Add new value**

In [94]:
df.loc['China'] = pd.Series({'Population': 1400, 'Continent': 'Asia'})
df

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
Canada,35.2,1785387.0,9984670.0,0.913,America,English
France,63.951,2833687.0,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,377930.0,0.891,Asia,English
UK,64.511,2950039.0,242495.0,0.907,Europe,English
USA,318.523,17348075.0,9525067.0,0.915,America,English
China,1400.0,,,,Asia,


## 2.6 Index changing

In [95]:
df.reset_index()

Unnamed: 0,index,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
0,Canada,35.2,1785387.0,9984670.0,0.913,America,English
1,France,63.951,2833687.0,640679.0,0.888,Europe,English
2,Germany,80.94,3874437.0,357114.0,0.916,Europe,English
3,Italy,60.665,2167744.0,301336.0,0.873,Europe,English
4,Japan,127.061,4602367.0,377930.0,0.891,Asia,English
5,UK,64.511,2950039.0,242495.0,0.907,Europe,English
6,USA,318.523,17348075.0,9525067.0,0.915,America,English
7,China,1400.0,,,,Asia,


In [96]:
df.set_index('Population')

Unnamed: 0_level_0,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
35.2,1785387.0,9984670.0,0.913,America,English
63.951,2833687.0,640679.0,0.888,Europe,English
80.94,3874437.0,357114.0,0.916,Europe,English
60.665,2167744.0,301336.0,0.873,Europe,English
127.061,4602367.0,377930.0,0.891,Asia,English
64.511,2950039.0,242495.0,0.907,Europe,English
318.523,17348075.0,9525067.0,0.915,America,English
1400.0,,,,Asia,


**Create a new column**

In [97]:
df['GDP per Capita'] = df['Gross Domestic Product'] / df['Population']
df

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Human Development Index,Continent,Language,GDP per Capita
Canada,35.2,1785387.0,9984670.0,0.913,America,English,50721.221591
France,63.951,2833687.0,640679.0,0.888,Europe,English,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,English,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,English,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,English,36221.712406
UK,64.511,2950039.0,242495.0,0.907,Europe,English,45729.239975
USA,318.523,17348075.0,9525067.0,0.915,America,English,54464.12033
China,1400.0,,,,Asia,,


**Change position of a column**

In [98]:
df.insert(2, 'GDP per Capita', df.pop('GDP per Capita'))
df

Unnamed: 0,Population,Gross Domestic Product,GDP per Capita,Surface Area,Human Development Index,Continent,Language
Canada,35.2,1785387.0,50721.221591,9984670.0,0.913,America,English
France,63.951,2833687.0,44310.284437,640679.0,0.888,Europe,English
Germany,80.94,3874437.0,47868.013343,357114.0,0.916,Europe,English
Italy,60.665,2167744.0,35733.025633,301336.0,0.873,Europe,English
Japan,127.061,4602367.0,36221.712406,377930.0,0.891,Asia,English
UK,64.511,2950039.0,45729.239975,242495.0,0.907,Europe,English
USA,318.523,17348075.0,54464.12033,9525067.0,0.915,America,English
China,1400.0,,,,,Asia,


## 2.7 Statistic methods

In [100]:
df.min(numeric_only=True)

Population                 3.520000e+01
Gross Domestic Product     1.785387e+06
GDP per Capita             3.573303e+04
Surface Area               2.424950e+05
Human Development Index    8.730000e-01
dtype: float64

In [101]:
df.max(numeric_only=True)

Population                 1.400000e+03
Gross Domestic Product     1.734808e+07
GDP per Capita             5.446412e+04
Surface Area               9.984670e+06
Human Development Index    9.160000e-01
dtype: float64

In [102]:
df.sum(numeric_only=True)

Population                 2.150851e+03
Gross Domestic Product     3.556174e+07
GDP per Capita             3.150476e+05
Surface Area               2.142929e+07
Human Development Index    6.303000e+00
dtype: float64

In [103]:
df.mean(numeric_only=True)

Population                 2.688564e+02
Gross Domestic Product     5.080248e+06
GDP per Capita             4.500680e+04
Surface Area               3.061327e+06
Human Development Index    9.004286e-01
dtype: float64

In [104]:
df.median(numeric_only=True)

Population                 7.272550e+01
Gross Domestic Product     2.950039e+06
GDP per Capita             4.572924e+04
Surface Area               3.779300e+05
Human Development Index    9.070000e-01
dtype: float64

In [105]:
df.std(numeric_only=True)

Population                 4.658408e+02
Gross Domestic Product     5.494020e+06
GDP per Capita             7.005590e+03
Surface Area               4.576187e+06
Human Development Index    1.659174e-02
dtype: float64

In [106]:
df.quantile(.25)

  df.quantile(.25)


Population                 6.312950e+01
Gross Domestic Product     2.500716e+06
GDP per Capita             4.026600e+04
Surface Area               3.292250e+05
Human Development Index    8.895000e-01
Name: 0.25, dtype: float64

In [107]:
df.quantile([.2, .4, .6, .8])

  df.quantile([.2, .4, .6, .8])


Unnamed: 0,Population,Gross Domestic Product,GDP per Capita,Surface Area,Human Development Index
0.2,61.9794,2300932.6,37839.426812,312491.6,0.8886
0.4,64.399,2880227.8,44877.866652,365440.4,0.8974
0.6,90.1642,3504677.8,47012.503996,535579.4,0.9106
0.8,241.9382,4456781.0,50150.579941,7748189.4,0.9146
