## Pandas

In [21]:
import pandas as pd
import numpy as np

In [2]:
certificates_earned = pd.Series(
    [8, 2, 5, 6],
    index=['Tom', 'Kris', 'Ahmad', 'Beau']
)
print(certificates_earned)

Tom      8
Kris     2
Ahmad    5
Beau     6
dtype: int64


In [5]:
print(certificates_earned[certificates_earned >5])

Tom     8
Beau    6
dtype: int64


In [6]:
certificates_by_time = pd.DataFrame({
    'Certificates': [8,2,5,6],
    'Time (months)': [16,5,9,12]
})

certificates_by_time.index = ['Tom','Kris','Ahmad','Beau']

print(certificates_by_time.iloc[2])

Certificates     5
Time (months)    9
Name: Ahmad, dtype: int64


In [7]:
certificates_by_cost = pd.DataFrame({
    'Certificates': [8,2,5,6],
    'Cost (Thousands)': [16,5,9,12]
})
names = ['Tom','Kris','Ahmad','Bea']

certificates_by_cost.index = names
longest_streak = pd.Series([13,11,9,7], index=names)
certificates_by_cost['Longest Streak']=longest_streak

print(certificates_by_cost)

       Certificates  Cost (Thousands)  Longest Streak
Tom               8                16              13
Kris              2                 5              11
Ahmad             5                 9               9
Bea               6                12               7


In [14]:
certificates_by_time['Certificates per Month']=round(certificates_by_time['Certificates'] / certificates_by_time['Time (months)'],2)

print(certificates_by_time)

       Certificates  Time (months)  Certificates per Month
Tom               8             16                    0.50
Kris              2              5                    0.40
Ahmad             5              9                    0.56
Beau              6             12                    0.50


In [3]:
# In million
g7_pop = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])

In [4]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
dtype: float64

In [5]:
g7_pop.name = "G7 Population in Millions"

In [6]:
g7_pop

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
Name: G7 Population in Millions, dtype: float64

In [7]:
g7_pop.dtype

dtype('float64')

In [8]:
g7_pop.values

array([ 35.467,  63.951,  80.94 ,  60.665, 127.061,  64.511, 318.523])

In [9]:
g7_pop[0]

35.467

In [10]:
type(g7_pop.values)

numpy.ndarray

In [11]:
g7_pop.index

RangeIndex(start=0, stop=7, step=1)

In [12]:
l = ['a','b','c']

In [16]:
g7_pop.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
    ]

In [17]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

Series is list|dictionaries. Except ordered (dictionaries now are ordered).  
Alternate methods for creation:

In [18]:
pd.Series({
    'Canada': 34.467,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523
}, name='G7 Population in Millions')

Canada             34.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [19]:
pd.Series(
    [34.467, 63.951, 80.94, 60.665, 127.061, 64.511, 318.523],
    index=['Canada','France','Germany','Italy','Japan','United Kingdom','Untied States'],
    name='G7 Population in Millions')

Canada             34.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
Untied States     318.523
Name: G7 Population in Millions, dtype: float64

In [20]:
pd.Series(g7_pop, index=['France', 'Germany', 'Italy', 'Spain'])

France     63.951
Germany    80.940
Italy      60.665
Spain         NaN
Name: G7 Population in Millions, dtype: float64

### Indexing

In [24]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [25]:
g7_pop['Canada']

35.467

In [26]:
g7_pop['Japan']

127.061

In [27]:
g7_pop.iloc[0]

35.467

In [28]:
g7_pop.iloc[-1]

318.523

In [29]:
g7_pop[['Italy','France']]

Italy     60.665
France    63.951
Name: G7 Population in Millions, dtype: float64

In [30]:
g7_pop.iloc[[0,1]]

Canada    35.467
France    63.951
Name: G7 Population in Millions, dtype: float64

In [31]:
g7_pop['Canada':'Italy']

Canada     35.467
France     63.951
Germany    80.940
Italy      60.665
Name: G7 Population in Millions, dtype: float64

IMPORTANT: Slices in Pandas includes the final element selected in the slice, which is not the case with vanilla python/numpy

### Conditional Selection

In [35]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [36]:
g7_pop > 70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 Population in Millions, dtype: bool

In [37]:
g7_pop[g7_pop>70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 Population in Millions, dtype: float64

In [39]:
g7_pop.mean()

107.30257142857144

In [40]:
g7_pop[g7_pop > g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 Population in Millions, dtype: float64

In [42]:
g7_pop.std()

97.24996987121581

In [66]:
g7_pop[(g7_pop<g7_pop.mean() - g7_pop.std()/2)|(g7_pop>g7_pop.mean() + g7_pop.std()/2)]

Canada            35.467
United States    318.523
Name: G7 Population in Millions, dtype: float64

In [71]:
g7_pop[(g7_pop > 80) & (g7_pop<200)]

Germany     80.940
Japan      127.061
Name: G7 Population in Millions, dtype: float64

In [49]:
### Operations and Methods

In [50]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [55]:
g7_pop * 1_000_000
# The _ are just for readability

Canada             35467000.0
France             63951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: G7 Population in Millions, dtype: float64

In [54]:
g7_pop * 1000000

Canada             35467000.0
France             63951000.0
Germany            80940000.0
Italy              60665000.0
Japan             127061000.0
United Kingdom     64511000.0
United States     318523000.0
Name: G7 Population in Millions, dtype: float64

In [56]:
g7_pop.mean()

107.30257142857144

In [57]:
g7_pop.mean() * 1_000_000

107302571.42857143

In [59]:
round(g7_pop.mean()*1_000_000,0)

107302571.0

In [62]:
np.log(g7_pop)
# np.log in ln

Canada            3.568603
France            4.158117
Germany           4.393708
Italy             4.105367
Japan             4.844667
United Kingdom    4.166836
United States     5.763695
Name: G7 Population in Millions, dtype: float64

### Modifying Series

In [72]:
g7_pop['Canada'] = 40.5

In [73]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 Population in Millions, dtype: float64

In [75]:
g7_pop.iloc[-1] = 500

In [76]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     500.000
Name: G7 Population in Millions, dtype: float64

In [77]:
g7_pop[g7_pop < 70] = 99.99

In [78]:
g7_pop

Canada             99.990
France             99.990
Germany            80.940
Italy              99.990
Japan             127.061
United Kingdom     99.990
United States     500.000
Name: G7 Population in Millions, dtype: float64

### Data Frames
Similar in nature to a table, i.e. a section of an excel docuent.

**ISSUE:** Cannot add string to the dataframe?

In [87]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94, 60.655, 127.061, 64.511, 318.523],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI':[
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Contient': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns=['Population','GDP','Surface Area','HDI','Continent'])

In [89]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,
1,63.951,2833687,640679,0.888,
2,80.94,3874437,357114,0.916,
3,60.655,2167744,301336,0.873,
4,127.061,4602367,377930,0.891,
5,64.511,2950039,242495,0.907,
6,318.523,17348075,9525067,0.915,


In [91]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States'
]

In [92]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,
France,63.951,2833687,640679,0.888,
Germany,80.94,3874437,357114,0.916,
Italy,60.655,2167744,301336,0.873,
Japan,127.061,4602367,377930,0.891,
United Kingdom,64.511,2950039,242495,0.907,
United States,318.523,17348075,9525067,0.915,


In [95]:
continent_list = ['America','Europe','Europe','Europe','Asia','Europe','America']
df['Continent'] = continent_list

In [96]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [97]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [98]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [99]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [100]:
df.size

35

In [101]:
df.shape

(7, 5)

In [102]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.301143,5080248.0,3061327.0,0.900429
std,97.250769,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.303,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [103]:
df.dtypes

Population      float64
GDP               int64
Surface Area      int64
HDI             float64
Continent        object
dtype: object

In [105]:
df.dtypes.value_counts()

float64    2
int64      2
object     1
Name: count, dtype: int64

In [106]:
df['Population']

Canada             35.467
France             63.951
Germany            80.940
Italy              60.655
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [107]:
df['Population'].to_frame()

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.655
Japan,127.061
United Kingdom,64.511
United States,318.523


In [108]:
df[['Population','GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.655,2167744
Japan,127.061,4602367
United Kingdom,64.511,2950039
United States,318.523,17348075


In [109]:
df[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


Slice operates as default python slicing (e.g. num.3 is missing)

In [111]:
df.iloc[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [113]:
df.loc['Italy']

Population       60.655
GDP             2167744
Surface Area     301336
HDI               0.873
Continent        Europe
Name: Italy, dtype: object

In [114]:
df.loc['France':'Italy']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe


In [115]:
df.loc['Italy':'France']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent


In [116]:
df.loc['France':'Italy','Population']

France     63.951
Germany    80.940
Italy      60.655
Name: Population, dtype: float64

In [117]:
df.loc['France':'Italy',['Population','GDP']]

Unnamed: 0,Population,GDP
France,63.951,2833687
Germany,80.94,3874437
Italy,60.655,2167744


data frame: select by index via loc, gives row.  
select by [''] will give a column

In [118]:
df.loc['Canada']

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [119]:
df.iloc[-1]

Population       318.523
GDP             17348075
Surface Area     9525067
HDI                0.915
Continent        America
Name: United States, dtype: object

In [120]:
df['Surface Area']

Canada            9984670
France             640679
Germany            357114
Italy              301336
Japan              377930
United Kingdom     242495
United States     9525067
Name: Surface Area, dtype: int64

In [122]:
df.iloc[[0,1,-1]]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
United States,318.523,17348075,9525067,0.915,America


In [125]:
df.iloc[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [126]:
df.iloc[1:3,3]

France     0.888
Germany    0.916
Name: HDI, dtype: float64

### Conditional Selection

In [128]:
df['Population']>70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: Population, dtype: bool

In [129]:
df.loc[df['Population']>70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United States,318.523,17348075,9525067,0.915,America


In [132]:
df.loc[df['Population']>70,'Population']

Germany           80.940
Japan            127.061
United States    318.523
Name: Population, dtype: float64

In [134]:
df.loc[df['Population']>70,['Population','GDP']]

Unnamed: 0,Population,GDP
Germany,80.94,3874437
Japan,127.061,4602367
United States,318.523,17348075


In [135]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [136]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [137]:
df.drop(['Canada','Japan'])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [138]:
df.drop(columns=['Population','HDI'])

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [139]:
df.drop(['Italy','Canada'],axis=0)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [141]:
df.drop(['Population','HDI'],axis=1)

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [143]:
df.drop(['Population','HDI'],axis='columns')

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [145]:
df.drop(['Canada','Germany'], axis='rows')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Italy,60.655,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


### Operations

In [146]:
df[['Population','GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.655,2167744
Japan,127.061,4602367
United Kingdom,64.511,2950039
United States,318.523,17348075


In [148]:
df[['Population','GDP']] / 100

Unnamed: 0,Population,GDP
Canada,0.35467,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60655,21677.44
Japan,1.27061,46023.67
United Kingdom,0.64511,29500.39
United States,3.18523,173480.75


In [150]:
crisis = pd.Series([-1_000_000,-0.3], index=['GDP','HDI'])
crisis

GDP   -1000000.0
HDI         -0.3
dtype: float64

In [151]:
df[['GDP','HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [152]:
df [['GDP','HDI']]+crisis

Unnamed: 0,GDP,HDI
Canada,785387.0,0.613
France,1833687.0,0.588
Germany,2874437.0,0.616
Italy,1167744.0,0.573
Japan,3602367.0,0.591
United Kingdom,1950039.0,0.607
United States,16348075.0,0.615


In [154]:
units_adjust = pd.Series([1_000_000,1/1_000_000], index=['Population','GDP'])

In [156]:
df[['Population','GDP']] * units_adjust

Unnamed: 0,Population,GDP
Canada,35467000.0,1.785387
France,63951000.0,2.833687
Germany,80940000.0,3.874437
Italy,60655000.0,2.167744
Japan,127061000.0,4.602367
United Kingdom,64511000.0,2.950039
United States,318523000.0,17.348075


### Modifying DataFrames

In [160]:
langs = pd.Series (
    ['France', 'German', 'Italian'],
    index=['France','Germany','Italy'],
    name='Language'
)

In [161]:
langs

France      France
Germany     German
Italy      Italian
Name: Language, dtype: object

In [163]:
df['Language'] = langs

In [164]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,France
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.655,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [165]:
df['Language']=df['Language'].fillna('English')

In [166]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,France
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.655,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [167]:
df.rename(
    columns={
        'HDI': 'Human Development Index',
        'Anual Popcorn Consumption': 'APC'
    }, index={
        'United States': 'USA',
        'United Kingdom': 'UK',
        'Argentina': 'AG'
    })

Unnamed: 0,Population,GDP,Surface Area,Human Development Index,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,France
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.655,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [168]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,France
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.655,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [169]:
df.rename(index=str.upper)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
CANADA,35.467,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,France
GERMANY,80.94,3874437,357114,0.916,Europe,German
ITALY,60.655,2167744,301336,0.873,Europe,Italian
JAPAN,127.061,4602367,377930,0.891,Asia,English
UNITED KINGDOM,64.511,2950039,242495,0.907,Europe,English
UNITED STATES,318.523,17348075,9525067,0.915,America,English


In [171]:
df.rename(index=lambda x: x.lower())

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
canada,35.467,1785387,9984670,0.913,America,English
france,63.951,2833687,640679,0.888,Europe,France
germany,80.94,3874437,357114,0.916,Europe,German
italy,60.655,2167744,301336,0.873,Europe,Italian
japan,127.061,4602367,377930,0.891,Asia,English
united kingdom,64.511,2950039,242495,0.907,Europe,English
united states,318.523,17348075,9525067,0.915,America,English


In [172]:
df.drop(columns='Language', inplace=True)

In [173]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.655,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [176]:
df.loc['China'] = pd.Series({'Population':1_400_000_000, 'Continent':'Asia'})

In [177]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.655,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America
China,1400000000.0,,,,Asia


In [178]:
df.drop('China',inplace=True)

In [179]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.655,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America


In [180]:
df.reset_index()

Unnamed: 0,index,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387.0,9984670.0,0.913,America
1,France,63.951,2833687.0,640679.0,0.888,Europe
2,Germany,80.94,3874437.0,357114.0,0.916,Europe
3,Italy,60.655,2167744.0,301336.0,0.873,Europe
4,Japan,127.061,4602367.0,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
6,United States,318.523,17348075.0,9525067.0,0.915,America


In [181]:
df.set_index('Population')

Unnamed: 0_level_0,GDP,Surface Area,HDI,Continent
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
35.467,1785387.0,9984670.0,0.913,America
63.951,2833687.0,640679.0,0.888,Europe
80.94,3874437.0,357114.0,0.916,Europe
60.655,2167744.0,301336.0,0.873,Europe
127.061,4602367.0,377930.0,0.891,Asia
64.511,2950039.0,242495.0,0.907,Europe
318.523,17348075.0,9525067.0,0.915,America


In [182]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.655,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America


In [184]:
df[['Population','GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387.0
France,63.951,2833687.0
Germany,80.94,3874437.0
Italy,60.655,2167744.0
Japan,127.061,4602367.0
United Kingdom,64.511,2950039.0
United States,318.523,17348075.0


In [185]:
df['GDP']/df['Population']

Canada            50339.385908
France            44310.284437
Germany           47868.013343
Italy             35738.916825
Japan             36221.712406
United Kingdom    45729.239975
United States     54464.120330
dtype: float64

In [188]:
df['GDP Per Capita'] = df['GDP'] / df['Population']

In [189]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP Per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.655,2167744.0,301336.0,0.873,Europe,35738.916825
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe,45729.239975
United States,318.523,17348075.0,9525067.0,0.915,America,54464.12033


In [190]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP Per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.655,2167744.0,301336.0,0.873,Europe,35738.916825
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406


In [191]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI,GDP Per Capita
count,7.0,7.0,7.0,7.0,7.0
mean,107.301143,5080248.0,3061327.0,0.900429,44953.096175
std,97.250769,5494020.0,4576187.0,0.016592,6953.682591
min,35.467,1785387.0,242495.0,0.873,35738.916825
25%,62.303,2500716.0,329225.0,0.8895,40265.998421
50%,64.511,2950039.0,377930.0,0.907,45729.239975
75%,104.0005,4238402.0,5082873.0,0.914,49103.699626
max,318.523,17348080.0,9984670.0,0.916,54464.12033


In [193]:
population = df['Population']

In [194]:
population.min(), population.max()

(35.467, 318.523)

In [195]:
population.sum()

751.1080000000001

In [196]:
population.sum()/len(population)

107.30114285714286

In [197]:
population.mean()

107.30114285714286

In [198]:
population.std()

97.25076921448759

In [201]:
population.median()

64.511

In [202]:
population.describe()

count      7.000000
mean     107.301143
std       97.250769
min       35.467000
25%       62.303000
50%       64.511000
75%      104.000500
max      318.523000
Name: Population, dtype: float64

In [203]:
population.quantile(.25)

62.303

In [204]:
population.quantile([.2,.4,.6,.8,1])

0.2     61.3142
0.4     64.1750
0.6     74.3684
0.8    117.8368
1.0    318.5230
Name: Population, dtype: float64