In [1]:
import pandas as pd
import numpy as np

### Pandas data frame

In [2]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns =['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])

In [3]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [4]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

In [5]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [6]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [8]:
df.size

35

In [9]:
df.shape

(7, 5)

In [13]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [161]:
 df.dtypes

Population      float64
GDP               int64
Surface Area      int64
HDI             float64
Continent        object
dtype: object

### Indexing and slicing

If you want to select a row, you need to do in this way:

In [162]:
df.loc['Canada']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [163]:
df.loc['Canada', 'Population']

35.467

In [14]:
df.iloc[-1]

Population       318.523
GDP             17348075
Surface Area     9525067
HDI                0.915
Continent        America
Name: United States, dtype: object

In [15]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [165]:
df.loc['France': 'Italy', 'Population']

France     63.951
Germany    80.940
Italy      60.665
Name: Population, dtype: float64

In [166]:
df['Population']

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [16]:
# if you want to make the series a little presentable
df['Population'].to_frame()

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [18]:
dfCanada = df.loc['Canada', 'Population': 'HDI']

In [19]:
dfCanada

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Name: Canada, dtype: object

Here, all the columns becomes the index for new series 'dfCanada'

In [170]:
dfCanada['Population'].dtype

dtype('float64')

In [171]:
dfCanada['Population': 'HDI']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Name: Canada, dtype: object

In [172]:
dfCanada.iloc[0: len(dfCanada) - 1]

Population       35.467
GDP             1785387
Surface Area    9984670
Name: Canada, dtype: object

### Conditionla Selection (Boolean Arrays)

In [173]:
df['Population'] > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: Population, dtype: bool

In [174]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [175]:
df.loc[df['Population'] > 70, 'GDP': 'HDI']

Unnamed: 0,GDP,Surface Area,HDI
Germany,3874437,357114,0.916
Japan,4602367,377930,0.891
United States,17348075,9525067,0.915


In [176]:
crisis = pd.Series([-100000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -100000.0
HDI        -0.3
dtype: float64

In [177]:
df[['GDP', 'HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [178]:
df[['GDP', 'HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,1685387.0,0.613
France,2733687.0,0.588
Germany,3774437.0,0.616
Italy,2067744.0,0.573
Japan,4502367.0,0.591
United Kingdom,2850039.0,0.607
United States,17248075.0,0.615


### Adding a new column

In [179]:
df['Language'] = 'English'

In [180]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [20]:
# It matches the indexes and assigns the values
langs = pd.Series(
                    ['English', 'French', 'German', 'Italian'], 
                     index=['Canada', 'France', 'Germany', 'Italy'],
                     name='Language')

In [21]:
langs

Canada     English
France      French
Germany     German
Italy      Italian
Name: Language, dtype: object

In [22]:
df['Language'] = langs

In [23]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


### Renaming the columns

In [185]:
df2 = df.rename(columns={
    'HDI': 'Human Dev Index',
},
     index={
         'United Kingdom': 'UK',
         'United States': 'USA'
     }
         )

In [147]:
df2

Unnamed: 0,Population,GDP,Surface Area,Human Dev Index,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [188]:
df['GDP per Capita'] = df['GDP'] / df['Population']

In [189]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language,GDP per Capita
Canada,35.467,1785387,9984670,0.913,America,English,50339.385908
France,63.951,2833687,640679,0.888,Europe,French,44310.284437
Germany,80.94,3874437,357114,0.916,Europe,German,47868.013343
Italy,60.665,2167744,301336,0.873,Europe,Italian,35733.025633
Japan,127.061,4602367,377930,0.891,Asia,,36221.712406
United Kingdom,64.511,2950039,242495,0.907,Europe,,45729.239975
United States,318.523,17348075,9525067,0.915,America,,54464.12033
