In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({
    'Population':[65.789,54.6743,345.678,23.456,67.890,123.098,59.000],
    'GDP':[
        1234567,
        3456788,
        2346577,
        4847658,
        6754444,
        2343454,
        8577547
    ],
    'HDI':[
        0.913,
        0.3432,
        0.321,
        0.879,
        0.373,
        0.999,
        0.342
    ]
})

In [3]:
df

Unnamed: 0,Population,GDP,HDI
0,65.789,1234567,0.913
1,54.6743,3456788,0.3432
2,345.678,2346577,0.321
3,23.456,4847658,0.879
4,67.89,6754444,0.373
5,123.098,2343454,0.999
6,59.0,8577547,0.342


In [4]:
df.columns

Index(['Population', 'GDP', 'HDI'], dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Population  7 non-null      float64
 1   GDP         7 non-null      int64  
 2   HDI         7 non-null      float64
dtypes: float64(2), int64(1)
memory usage: 300.0 bytes


In [6]:
df.shape

(7, 3)

In [7]:
df.describe()

Unnamed: 0,Population,GDP,HDI
count,7.0,7.0,7.0
mean,105.655043,4223005.0,0.595743
std,109.896306,2656186.0,0.315374
min,23.456,1234567.0,0.321
25%,56.83715,2345016.0,0.3426
50%,65.789,3456788.0,0.373
75%,95.494,5801051.0,0.896
max,345.678,8577547.0,0.999


In [8]:
df.index = [
    'Canada',
    'USA',
    'Ghana',
    'Rwanda',
    'Netherlands',
    'Australia',
    'Austria'
]

df

Unnamed: 0,Population,GDP,HDI
Canada,65.789,1234567,0.913
USA,54.6743,3456788,0.3432
Ghana,345.678,2346577,0.321
Rwanda,23.456,4847658,0.879
Netherlands,67.89,6754444,0.373
Australia,123.098,2343454,0.999
Austria,59.0,8577547,0.342


To select a row or an index you use an attribute called loc(if you call the row by name) or iloc(if you call the row by sequential index)

In [9]:
df.loc['Rwanda']

Population         23.456
GDP           4847658.000
HDI                 0.879
Name: Rwanda, dtype: float64

To call a column however, the index is applied diretly to the dataframe

In [10]:
df['GDP']

Canada         1234567
USA            3456788
Ghana          2346577
Rwanda         4847658
Netherlands    6754444
Australia      2343454
Austria        8577547
Name: GDP, dtype: int64

In [11]:
df.loc['Canada':'Netherlands']

Unnamed: 0,Population,GDP,HDI
Canada,65.789,1234567,0.913
USA,54.6743,3456788,0.3432
Ghana,345.678,2346577,0.321
Rwanda,23.456,4847658,0.879
Netherlands,67.89,6754444,0.373


In [12]:
df.iloc[2:5,0]

Ghana          345.678
Rwanda          23.456
Netherlands     67.890
Name: Population, dtype: float64

In [13]:
df.loc['Ghana':'Austria','Population']

Ghana          345.678
Rwanda          23.456
Netherlands     67.890
Australia      123.098
Austria         59.000
Name: Population, dtype: float64

In [14]:
df['Population'] <59

Canada         False
USA             True
Ghana          False
Rwanda          True
Netherlands    False
Australia      False
Austria        False
Name: Population, dtype: bool

In [26]:
df.loc[df['Population']< 59]

Unnamed: 0,Population,GDP,HDI,Continent,GDP per Capita
USA,54.6743,3456788,0.3432,North America,63225.098447
Rwanda,23.456,4847658,0.879,Africa,206670.276262


In [16]:
df.drop('Canada')

Unnamed: 0,Population,GDP,HDI
USA,54.6743,3456788,0.3432
Ghana,345.678,2346577,0.321
Rwanda,23.456,4847658,0.879
Netherlands,67.89,6754444,0.373
Australia,123.098,2343454,0.999
Austria,59.0,8577547,0.342


Operations just like the one in numpy can be done here as well

In [17]:
crisis=pd.Series([-10000,-0.1], index=['GDP','HDI'])
crisis

GDP   -10000.0
HDI       -0.1
dtype: float64

In [18]:
df[['GDP','HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,1224567.0,0.813
USA,3446788.0,0.2432
Ghana,2336577.0,0.221
Rwanda,4837658.0,0.779
Netherlands,6744444.0,0.273
Australia,2333454.0,0.899
Austria,8567547.0,0.242


### Modifying Dataframes 

In [19]:
conts=pd.Series({
    'Canada':'North America',
    'USA':'North America',
    'Ghana': 'Africa',
    'Rwanda':'Africa',
    'Netherlands':'Europe',
    'Austria':'Europe'
}, name='Continent')

In [20]:
df['Continent']= conts
df

Unnamed: 0,Population,GDP,HDI,Continent
Canada,65.789,1234567,0.913,North America
USA,54.6743,3456788,0.3432,North America
Ghana,345.678,2346577,0.321,Africa
Rwanda,23.456,4847658,0.879,Africa
Netherlands,67.89,6754444,0.373,Europe
Australia,123.098,2343454,0.999,
Austria,59.0,8577547,0.342,Europe


In [21]:
df.rename(columns={'Continent':'CONTINENT'},
          index={'USA':'United States of America'})

Unnamed: 0,Population,GDP,HDI,CONTINENT
Canada,65.789,1234567,0.913,North America
United States of America,54.6743,3456788,0.3432,North America
Ghana,345.678,2346577,0.321,Africa
Rwanda,23.456,4847658,0.879,Africa
Netherlands,67.89,6754444,0.373,Europe
Australia,123.098,2343454,0.999,
Austria,59.0,8577547,0.342,Europe


In [22]:
df

Unnamed: 0,Population,GDP,HDI,Continent
Canada,65.789,1234567,0.913,North America
USA,54.6743,3456788,0.3432,North America
Ghana,345.678,2346577,0.321,Africa
Rwanda,23.456,4847658,0.879,Africa
Netherlands,67.89,6754444,0.373,Europe
Australia,123.098,2343454,0.999,
Austria,59.0,8577547,0.342,Europe


In [23]:
df['GDP'] / df['Population']

Canada          18765.553512
USA             63225.098447
Ghana            6788.331916
Rwanda         206670.276262
Netherlands     99491.000147
Australia       19037.303612
Austria        145382.152542
dtype: float64

In [24]:
df['GDP per Capita']= df['GDP'] /df['Population']

In [25]:
df

Unnamed: 0,Population,GDP,HDI,Continent,GDP per Capita
Canada,65.789,1234567,0.913,North America,18765.553512
USA,54.6743,3456788,0.3432,North America,63225.098447
Ghana,345.678,2346577,0.321,Africa,6788.331916
Rwanda,23.456,4847658,0.879,Africa,206670.276262
Netherlands,67.89,6754444,0.373,Europe,99491.000147
Australia,123.098,2343454,0.999,,19037.303612
Austria,59.0,8577547,0.342,Europe,145382.152542
