In [92]:
import pandas as pd
import numpy as np

# Panda series

In [93]:
pop = pd.Series([1,2,3,4,5,6,7])
pop

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64

In [94]:
pop.name = "G7 pop in million"
pop

0    1
1    2
2    3
3    4
4    5
5    6
6    7
Name: G7 pop in million, dtype: int64

In [95]:
print(pop.dtype)
print(pop.values)
print(type(pop.values))

int64
[1 2 3 4 5 6 7]
<class 'numpy.ndarray'>


In [96]:
pop[0]

1

In [97]:
pop.index

RangeIndex(start=0, stop=7, step=1)

In [98]:
pop.index = [
    "Canada",
    "China",
    "America",
    "Japan",
    "UK",
    "Aus",
    "Russiaz"
]
# the index is explicitly associated with 
# value comparing to the "List"
pop

Canada     1
China      2
America    3
Japan      4
UK         5
Aus        6
Russiaz    7
Name: G7 pop in million, dtype: int64

# Create a Panda Series

In [99]:
pd.Series(
{
    "Canada": 1,
    "China": 2,
    "America": 3,
    "Japan": 4,
    "UK": 5,
    "Aus": 6,
    "Russiaz": 7
},
    name = "Pop"
)

Canada     1
China      2
America    3
Japan      4
UK         5
Aus        6
Russiaz    7
Name: Pop, dtype: int64

In [100]:
pd.Series(
    [1,2,3,4,5,6,7],
    index = ["Canada","China","America","Japan","UK","Aus","Russiaz"],
    name = "pop"  
)

Canada     1
China      2
America    3
Japan      4
UK         5
Aus        6
Russiaz    7
Name: pop, dtype: int64

# Create a sub-Series

In [101]:
pd.Series(pop, index = ["China"])

China    2
Name: G7 pop in million, dtype: int64

# Indexing

In [102]:
#Search for value
print(pop["America"] )
print("")

print(pop.iloc[0])


3

1


In [103]:
#Search for a data set
print(pop.iloc[[0]])
print("")

# iloc retrieve more than one data according to position
print(pop.iloc[[0,-1]]) 
print("")

#no need number, direct quoting
print(pop[["China","Japan"]]) 
print("")

#Search using range
pop["China":"Aus"]

Canada    1
Name: G7 pop in million, dtype: int64

Canada     1
Russiaz    7
Name: G7 pop in million, dtype: int64

China    2
Japan    4
Name: G7 pop in million, dtype: int64



China      2
America    3
Japan      4
UK         5
Aus        6
Name: G7 pop in million, dtype: int64

# Conditional selection <br>
(Boolean arrays)

In [104]:
pop

Canada     1
China      2
America    3
Japan      4
UK         5
Aus        6
Russiaz    7
Name: G7 pop in million, dtype: int64

In [105]:
pop > 4

Canada     False
China      False
America    False
Japan      False
UK          True
Aus         True
Russiaz     True
Name: G7 pop in million, dtype: bool

In [106]:
#Boolean Selection
pop[pop > 4]

UK         5
Aus        6
Russiaz    7
Name: G7 pop in million, dtype: int64

In [107]:
print(pop.mean())
print(pop.std())

4.0
2.160246899469287


In [108]:
pop[(pop > pop.mean()-pop.std()/2) | (pop > pop.mean()+pop.std()/2)] 

America    3
Japan      4
UK         5
Aus        6
Russiaz    7
Name: G7 pop in million, dtype: int64

# Logic Gate

~ not <br> 
| or <br> 
& and <br> 

In [109]:
pop["UK"] = 0
pop

Canada     1
China      2
America    3
Japan      4
UK         0
Aus        6
Russiaz    7
Name: G7 pop in million, dtype: int64

In [110]:
pop[pop <4] = 99
pop

Canada     99
China      99
America    99
Japan       4
UK         99
Aus         6
Russiaz     7
Name: G7 pop in million, dtype: int64

# Dataframe

In [111]:
df = pd.DataFrame({ # () for pd.DataFrame {} for include all info
    'Population': [35.467, 
                   63.951, 
                   80.94 , 
                   60.665, 
                   127.061, 
                   64.511, 
                   318.523
    ],
    'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
    'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])

In [112]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [113]:
df.index =[
    "Canada",
    "France",
    "Germany",
    "Italy",
    "Japan",
    "UK",
    "US"
]

df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [114]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [115]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'UK', 'US'], dtype='object')

In [116]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to US
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [117]:
 df.size  #total entry

35

In [118]:
df.shape #matrix shape

(7, 5)

In [119]:
df.describe().round(3)

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.303,5080248.0,3061327.286,0.9
std,97.25,5494020.0,4576186.575,0.017
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.89
50%,64.511,2950039.0,377930.0,0.907
75%,104.0,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [120]:
df.dtypes

Population      float64
GDP               int64
Surface Area      int64
HDI             float64
Continent        object
dtype: object

In [121]:
df.dtypes.value_counts()

float64    2
int64      2
object     1
dtype: int64

# Indexing, Selection and Slicing

In [122]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [123]:
df.loc["Canada"] # Retrive a column / a set of data

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [124]:
df.iloc[-1]

Population       318.523
GDP             17348075
Surface Area     9525067
HDI                0.915
Continent        America
Name: US, dtype: object

In [125]:
df["Population"] # Retrive the same attribute

Canada      35.467
France      63.951
Germany     80.940
Italy       60.665
Japan      127.061
UK          64.511
US         318.523
Name: Population, dtype: float64

## Into a frame

### column

In [126]:
df["Population"].to_frame() # slicing into a dataframe

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
UK,64.511
US,318.523


In [127]:
df[["Population", "GDP"]] # like a view

Unnamed: 0,Population,GDP
Canada,35.467,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.665,2167744
Japan,127.061,4602367
UK,64.511,2950039
US,318.523,17348075


### row

In [128]:
df[1:3] #In a row level

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


### Loc and iloc

In [129]:
df.loc["Italy"]

Population       60.665
GDP             2167744
Surface Area     301336
HDI               0.873
Continent        Europe
Name: Italy, dtype: object

In [130]:
df.loc["Italy":"Italy"]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Italy,60.665,2167744,301336,0.873,Europe


In [131]:
df.loc["France":"Italy"]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe


In [132]:
df.loc["France":"Italy", "Population"]

France     63.951
Germany    80.940
Italy      60.665
Name: Population, dtype: float64

<span class="burk">'__Final use of loc__'</span>

In [133]:
df.loc["France":"Italy", ["Population"]]

Unnamed: 0,Population
France,63.951
Germany,80.94
Italy,60.665


In [134]:
df.iloc[0]

Population       35.467
GDP             1785387
Surface Area    9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [135]:
df.iloc[[0,1,-1]]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
US,318.523,17348075,9525067,0.915,America


In [136]:
df.iloc[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


### Full Slicing

In [137]:
df.iloc[1:3, 2]

France     640679
Germany    357114
Name: Surface Area, dtype: int64

In [138]:
df.iloc[1:3, [2,3]]

Unnamed: 0,Surface Area,HDI
France,640679,0.888
Germany,357114,0.916


In [139]:
df.iloc[1:3, 1:3]

Unnamed: 0,GDP,Surface Area
France,2833687,640679
Germany,3874437,357114


RECOMMENDED: Always use loc and iloc to reduce ambiguity, specially with DataFrames with numeric indexes.

## Conditional selection (boolean arrays)

We saw conditional selection applied to Series and it'll work in the same way for DataFrames. After all, a DataFrame is a collection of Series:

In [140]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [141]:
df['Population'] > 70

Canada     False
France     False
Germany     True
Italy      False
Japan       True
UK         False
US          True
Name: Population, dtype: bool

In [142]:
df.loc[df['Population'] > 70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
US,318.523,17348075,9525067,0.915,America


In [143]:
df.loc[df['Population'] > 70, 'Population']

Germany     80.940
Japan      127.061
US         318.523
Name: Population, dtype: float64

In [144]:
df.loc[df['Population'] > 70, ['Population']]

Unnamed: 0,Population
Germany,80.94
Japan,127.061
US,318.523


## Drop

In [145]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [146]:
df.drop(['Canada', 'Japan'])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [147]:
df.drop(columns=['Population', 'HDI']) #column level

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
UK,2950039,242495,Europe
US,17348075,9525067,America


In [148]:
df.drop(['Italy', 'Canada'], axis=0) # 0=rows

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [149]:
df.drop(['Population', 'HDI'], axis=1) #1=columns

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
UK,2950039,242495,Europe
US,17348075,9525067,America


## Operations

In [150]:
df[['Population', 'GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.665,2167744
Japan,127.061,4602367
UK,64.511,2950039
US,318.523,17348075


In [151]:
df[['Population', 'GDP']] /100 # not affect the original dataframe

Unnamed: 0,Population,GDP
Canada,0.35467,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60665,21677.44
Japan,1.27061,46023.67
UK,0.64511,29500.39
US,3.18523,173480.75


In [152]:
crisis = pd.Series([-1_000_000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -1000000.0
HDI         -0.3
dtype: float64

In [153]:
df[['GDP', 'HDI']] + crisis # individual column operation

Unnamed: 0,GDP,HDI
Canada,785387.0,0.613
France,1833687.0,0.588
Germany,2874437.0,0.616
Italy,1167744.0,0.573
Japan,3602367.0,0.591
UK,1950039.0,0.607
US,16348075.0,0.615


# Modifying DataFrames

It's simple and intuitive, You can add columns, or replace values for columns without issues:

##  Adding a new column

In [154]:
langs = pd.Series(
    ['French', 'German', 'Italian'],
    index=['France', 'Germany', 'Italy'],
    name='Language'
)
langs

France      French
Germany     German
Italy      Italian
Name: Language, dtype: object

In [155]:
df["Language"] = langs
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
UK,64.511,2950039,242495,0.907,Europe,
US,318.523,17348075,9525067,0.915,America,


## Replacing values per column

In [156]:
df['Language'] = 'English'

In [157]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
US,318.523,17348075,9525067,0.915,America,English


## Renaming Columns

In [158]:
df.rename(
    columns={
        'HDI': 'Human Development Index', # A:B > A change to B
        'Anual Popcorn Consumption': 'APC'
    }, index={
        'US': 'USA',
        'UK': 'LUK',
        'Argentina': 'AR'
    })

Unnamed: 0,Population,GDP,Surface Area,Human Development Index,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
LUK,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [159]:
df.rename(index=str.upper) # set all into upper case

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
CANADA,35.467,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.665,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602367,377930,0.891,Asia,English
UK,64.511,2950039,242495,0.907,Europe,English
US,318.523,17348075,9525067,0.915,America,English


In [160]:
df.rename(index=lambda x: x.lower()) # set all into lower case

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
canada,35.467,1785387,9984670,0.913,America,English
france,63.951,2833687,640679,0.888,Europe,English
germany,80.94,3874437,357114,0.916,Europe,English
italy,60.665,2167744,301336,0.873,Europe,English
japan,127.061,4602367,377930,0.891,Asia,English
uk,64.511,2950039,242495,0.907,Europe,English
us,318.523,17348075,9525067,0.915,America,English


## Dropping Column

In [161]:
df.drop(columns='Language', inplace=True)
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


## Adding Values (comparing to 9.1, add a row)

In [162]:
df.append(pd.Series({
    'Population': 3,
    'GDP': 5
}, name='China'))

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.665,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
UK,64.511,2950039.0,242495.0,0.907,Europe
US,318.523,17348075.0,9525067.0,0.915,America
China,3.0,5.0,,,


In [163]:
df.loc['China'] = pd.Series({
    'Population': 1_400_000_000, 
    'Continent': 'Asia'
})

df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.665,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
UK,64.511,2950039.0,242495.0,0.907,Europe
US,318.523,17348075.0,9525067.0,0.915,America
China,1400000000.0,,,,Asia


In [164]:
df.reset_index()
df.set_index("Population")
#reset index, and change it into one of the existing column 

Unnamed: 0_level_0,GDP,Surface Area,HDI,Continent
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
35.467,1785387.0,9984670.0,0.913,America
63.951,2833687.0,640679.0,0.888,Europe
80.94,3874437.0,357114.0,0.916,Europe
60.665,2167744.0,301336.0,0.873,Europe
127.061,4602367.0,377930.0,0.891,Asia
64.511,2950039.0,242495.0,0.907,Europe
318.523,17348075.0,9525067.0,0.915,America
1400000000.0,,,,Asia


## Creating columns from other columns
(Data manioulation)

In [165]:
df1 = df.loc[:,['Population', 'GDP']]
df1

Unnamed: 0,Population,GDP
Canada,35.467,1785387.0
France,63.951,2833687.0
Germany,80.94,3874437.0
Italy,60.665,2167744.0
Japan,127.061,4602367.0
UK,64.511,2950039.0
US,318.523,17348075.0
China,1400000000.0,


In [166]:
df1['GDP Per Capita']  = df1['GDP'] / df1['Population']
df1

Unnamed: 0,Population,GDP,GDP Per Capita
Canada,35.467,1785387.0,50339.385908
France,63.951,2833687.0,44310.284437
Germany,80.94,3874437.0,47868.013343
Italy,60.665,2167744.0,35733.025633
Japan,127.061,4602367.0,36221.712406
UK,64.511,2950039.0,45729.239975
US,318.523,17348075.0,54464.12033
China,1400000000.0,,


In [167]:
df['GDP Per Capita'] = df['GDP'] / df['Population']
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP Per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406
UK,64.511,2950039.0,242495.0,0.907,Europe,45729.239975
US,318.523,17348075.0,9525067.0,0.915,America,54464.12033
China,1400000000.0,,,,Asia,


# Statistical info


In [168]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP Per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406


In [169]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI,GDP Per Capita
count,8.0,7.0,7.0,7.0,7.0
mean,175000100.0,5080248.0,3061327.0,0.900429,44952.254576
std,494974700.0,5494020.0,4576187.0,0.016592,6954.983875
min,35.467,1785387.0,242495.0,0.873,35733.025633
25%,63.1295,2500716.0,329225.0,0.8895,40265.998421
50%,72.7255,2950039.0,377930.0,0.907,45729.239975
75%,174.9265,4238402.0,5082873.0,0.914,49103.699626
max,1400000000.0,17348080.0,9984670.0,0.916,54464.12033


In [170]:
population = df['Population']
population

Canada     3.546700e+01
France     6.395100e+01
Germany    8.094000e+01
Italy      6.066500e+01
Japan      1.270610e+02
UK         6.451100e+01
US         3.185230e+02
China      1.400000e+09
Name: Population, dtype: float64

In [171]:
population.min()

35.467

In [172]:
population.max()

1400000000.0

In [173]:
population.mean()

175000093.88975

In [174]:
population.std()

494974708.8934035

In [175]:
population.median()

72.7255

In [176]:
population.describe()

count    8.000000e+00
mean     1.750001e+08
std      4.949747e+08
min      3.546700e+01
25%      6.312950e+01
50%      7.272550e+01
75%      1.749265e+02
max      1.400000e+09
Name: Population, dtype: float64

In [177]:
population.quantile(0.25)
# can be from 0 to 1

63.1295