 # Pandas - DataFrames
Probably the most important data structure of pandas is the DataFrame. It's a tabular structure tightly integrated with Series.

In [2]:
import numpy as np
import pandas as pd
print(pd.__version__)

2.2.2


- We'll keep our analysis of G7 countries and looking now at DataFrames. As said, a DataFrame looks a lot like a table

- Creating DataFrames manually can be tedious. 99% of the time you'll be pulling the data from a Database, a csv file or the web. But still, you can create a DataFrame by specifying the columns and values:

In [91]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94 , 60.665, 127.061, 64.511, 318.523],
     'GDP': [
        1785387,
        2833687,
        3874437,
        2167744,
        4602367,
        2950039,
        17348075
    ],
    'Surface Area': [
        9984670,
        640679,
        357114,
        301336,
        377930,
        242495,
        9525067
    ],
     'HDI': [
        0.913,
        0.888,
        0.916,
        0.873,
        0.891,
        0.907,
        0.915
    ],
    'Continent': [
        'America',
        'Europe',
        'Europe',
        'Europe',
        'Asia',
        'Europe',
        'America'
    ]
})


In [None]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
0,35.467,1785387,9984670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.665,2167744,301336,0.873,Europe
4,127.061,4602367,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


DataFrames also have indexes. As you can see in the "table" above, pandas has assigned a numeric, autoincremental index automatically to each "row" in our DataFrame. In our case, we know that each row represents a country, so we'll just reassign the index:

In [93]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

In [35]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,
Germany,80.94,3874437,357114,0.916,Europe,
Italy,60.665,2167744,301336,0.873,Europe,
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [39]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'HDI', 'Continent', 'Language'], dtype='object')

In [40]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    7 non-null      float64
 1   GDP           7 non-null      int64  
 2   Surface Area  7 non-null      int64  
 3   HDI           7 non-null      float64
 4   Continent     7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 636.0+ bytes


In [None]:
df.size                               # Prints 35 (There are 35 elements i.e Rows 7 & Coloumns 5 - 7x5 = 35)

35

In [None]:
df.shape                                                    #  Rows 7 & Coloumns 5

(7, 5)

In [None]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [None]:
df.dtypes

Unnamed: 0,0
Population,float64
GDP,int64
Surface Area,int64
HDI,float64
Continent,object


---
# Indexing, Selection and Slicing
Individual columns in the DataFrame can be selected with regular indexing. Each column is represented as a Series:

In [None]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [41]:
df.loc['Canada']

Unnamed: 0,Canada
Population,35.467
GDP,1785387
Surface Area,9984670
HDI,0.913
Continent,America
Language,


In [42]:
df.iloc[-1]                                               # Prints the Coloumns and values of the last row or index

Unnamed: 0,United States
Population,318.523
GDP,17348075
Surface Area,9525067
HDI,0.915
Continent,America
Language,


In [43]:
df['Population']

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [None]:
df['Continent'].to_frame()

Unnamed: 0,Continent
Canada,America
France,Europe
Germany,Europe
Italy,Europe
Japan,Asia
United Kingdom,Europe
United States,America


In [None]:
# Multiple columns can also be selected similarly to numpy and Series
df[['Population','HDI']]

Unnamed: 0,Population,HDI
Canada,35.467,0.913
France,63.951,0.888
Germany,80.94,0.916
Italy,60.665,0.873
Japan,127.061,0.891
United Kingdom,64.511,0.907
United States,318.523,0.915


In [5]:
df[1:3]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [None]:
df.loc['Japan']

Unnamed: 0,Japan
Population,127.061
GDP,4602367
Surface Area,377930
HDI,0.891
Continent,Asia


In [None]:
df.loc['France':'United Kingdom']

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe


In [None]:
# As a second "argument", you can pass the column(s) you'd like to select:
df.loc['Germany':'Japan','Population']

Unnamed: 0,Population
Germany,80.94
Italy,60.665
Japan,127.061


In [None]:
df.loc['Germany':'Japan','Population':'HDI']

Unnamed: 0,Population,GDP,Surface Area,HDI
Germany,80.94,3874437,357114,0.916
Italy,60.665,2167744,301336,0.873
Japan,127.061,4602367,377930,0.891


In [None]:
df.loc['Germany':'Japan',['Population','HDI']]

Unnamed: 0,Population,HDI
Germany,80.94,0.916
Italy,60.665,0.873
Japan,127.061,0.891


iloc works with the (numeric) "position" of the index:

In [44]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,
Germany,80.94,3874437,357114,0.916,Europe,
Italy,60.665,2167744,301336,0.873,Europe,
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [45]:
df.iloc[0]                              # Prints all the values of the first Row

Unnamed: 0,Canada
Population,35.467
GDP,1785387
Surface Area,9984670
HDI,0.913
Continent,America
Language,


In [46]:
df.iloc[-1]                                                  # Prints all the values of the last Row

Unnamed: 0,United States
Population,318.523
GDP,17348075
Surface Area,9525067
HDI,0.915
Continent,America
Language,


In [9]:
df.iloc[[0,1,-1]]                                  # prints three different rows

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
United States,318.523,17348075,9525067,0.915,America


In [11]:
df.iloc[1:3,3]                                  # Prints Row 1 & Row 2 (We know Row 3 is excluded) and Coloumn 3

Unnamed: 0,HDI
France,0.888
Germany,0.916


In [14]:
df.iloc[1:3,[0,3]]                               # Prints Row 1 & Row 2 (We know Row 3 is excluded) and Coloumn 0 & Coloumn 3

Unnamed: 0,Population,HDI
France,63.951,0.888
Germany,80.94,0.916


In [16]:
df.iloc[1:3,1:3]

Unnamed: 0,GDP,Surface Area
France,2833687,640679
Germany,3874437,357114


RECOMMENDED: Always use loc and iloc to reduce ambiguity, specially with DataFrames with numeric indexes.

---
# Conditional selection (boolean arrays)



We saw conditional selection applied to Series and it'll work in the same way for DataFrames. After all, a DataFrame is a collection of Series:

In [47]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,
Germany,80.94,3874437,357114,0.916,Europe,
Italy,60.665,2167744,301336,0.873,Europe,
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [20]:
df[df['Population'] > 70]

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United States,318.523,17348075,9525067,0.915,America


In [23]:
df.loc[df['Population'] > 70, 'Population']                           # Prints only the 'Population' coloumn of the given condition

Unnamed: 0,Population
Germany,80.94
Japan,127.061
United States,318.523


In [27]:
df.loc[df['Population'] > 70, ['Population','GDP']]                           # Prints only the 'Population' & 'GDP' coloumns of the given condition

Unnamed: 0,Population,GDP
Germany,80.94,3874437
Japan,127.061,4602367
United States,318.523,17348075


---
# Dropping stuff
Opposed to the concept of selection, we have "dropping". Instead of pointing out which values you'd like to select you could point which ones you'd like to drop:

In [28]:
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [29]:
df.drop(['Canada','Japan'])

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [48]:
df.drop(columns=['Population','HDI'])

Unnamed: 0,GDP,Surface Area,Continent,Language
Canada,1785387,9984670,America,
France,2833687,640679,Europe,
Germany,3874437,357114,Europe,
Italy,2167744,301336,Europe,
Japan,4602367,377930,Asia,
United Kingdom,2950039,242495,Europe,
United States,17348075,9525067,America,


In [49]:
df.drop(['Italy','Japan'],axis = 0)

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,
Germany,80.94,3874437,357114,0.916,Europe,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


In [35]:
df.drop(['Population','HDI'],axis=1)

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [36]:
df.drop(['Canada','Italy'],axis='rows')

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [37]:
df.drop(['Population','HDI'],axis='columns')

Unnamed: 0,GDP,Surface Area,Continent
Canada,1785387,9984670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602367,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


All these drop methods return a new DataFrame. If you'd like to modify it "in place", you can use the inplace attribute (there's an example below).

---
# Operations

In [7]:
df[['Population', 'GDP']]

Unnamed: 0,Population,GDP
0,35.467,1785387
1,63.951,2833687
2,80.94,3874437
3,60.665,2167744
4,127.061,4602367
5,64.511,2950039
6,318.523,17348075


In [8]:
df[['Population','GDP']] / 100

Unnamed: 0,Population,GDP
0,0.35467,17853.87
1,0.63951,28336.87
2,0.8094,38744.37
3,0.60665,21677.44
4,1.27061,46023.67
5,0.64511,29500.39
6,3.18523,173480.75


Operations with Series work at a column level, broadcasting down the rows (which can be counter intuitive).

In [12]:
crisis = pd.Series([-1_000_000, -0.3],index = ['GDP','HDI'])
crisis

Unnamed: 0,0
GDP,-1000000.0
HDI,-0.3


In [50]:
df[['GDP','HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602367,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [51]:
df[['GDP','HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,785387.0,0.613
France,1833687.0,0.588
Germany,2874437.0,0.616
Italy,1167744.0,0.573
Japan,3602367.0,0.591
United Kingdom,1950039.0,0.607
United States,16348075.0,0.615




---


# Modifying DataFrames




It's simple and intuitive, You can add columns, or replace values for columns without issues:

**Adding a new column**

In [94]:
langs = pd.Series(['French','German','Italian'],
                  index = ['France','Germany','Italy'],
                  name = 'Language')

In [63]:
langs

Unnamed: 0,Language
France,French
Germany,German
Italy,Italian


In [95]:
df['Language'] = langs

In [65]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.665,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602367,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


 **Replacing values per column**

In [66]:
df['Language'] = 'English'

In [67]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


**Renaming Columns**

In [69]:
df.rename(
    columns={
        'GDP' : 'Gross domestic product',
        'Apc':'AAnual Popcorn Consumption'                          # I intentially added the column name 'Apc' which doesnt exist just to clarify it does not affect the modification if wrong values are added (same is done below with the index)
    }, index={
        'United States':'USA',
        'Kingdom of Saudi Arabia':'KSA'
    }
)

Unnamed: 0,Population,Gross domestic product,Surface Area,HDI,Continent,Language
Canada,35.467,1785387,9984670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.665,2167744,301336,0.873,Europe,English
Japan,127.061,4602367,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
USA,318.523,17348075,9525067,0.915,America,English


In [70]:
df.rename(index=str.upper)                                      # it captilizes the Indexes

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
CANADA,35.467,1785387,9984670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.665,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602367,377930,0.891,Asia,English
UNITED KINGDOM,64.511,2950039,242495,0.907,Europe,English
UNITED STATES,318.523,17348075,9525067,0.915,America,English


In [71]:
df.rename(index=lambda x: x.lower())

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,Language
canada,35.467,1785387,9984670,0.913,America,English
france,63.951,2833687,640679,0.888,Europe,English
germany,80.94,3874437,357114,0.916,Europe,English
italy,60.665,2167744,301336,0.873,Europe,English
japan,127.061,4602367,377930,0.891,Asia,English
united kingdom,64.511,2950039,242495,0.907,Europe,English
united states,318.523,17348075,9525067,0.915,America,English


**Dropping columns**

In [96]:
df.drop(columns='Language',inplace=True)

In [73]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.665,2167744,301336,0.873,Europe
Japan,127.061,4602367,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


**Adding values**

In [98]:
# Assuming df is your existing DataFrame
new_data = pd.Series({
    'Population': 3,
    'GDP': 5
}, name='China')

df = pd.concat([df, new_data.to_frame().T])


In [99]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387,9984670.0,0.913,America
France,63.951,2833687,640679.0,0.888,Europe
Germany,80.94,3874437,357114.0,0.916,Europe
Italy,60.665,2167744,301336.0,0.873,Europe
Japan,127.061,4602367,377930.0,0.891,Asia
United Kingdom,64.511,2950039,242495.0,0.907,Europe
United States,318.523,17348075,9525067.0,0.915,America
China,3.0,5,,,


In [101]:
df.loc['China'] = pd.Series({
    'Population':1_400_000_000,
    'Continent': 'Asia'
})

In [102]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.665,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America
China,1400000000.0,,,,Asia


We can use drop to just remove a row by index:

In [107]:
df.drop('China',inplace=True)

In [108]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.665,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America


In [109]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,35.467,1785387.0,9984670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.665,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602367.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America




---


# Creating columns from other columns

Altering a DataFrame often involves combining different columns into another. For example, in our Countries analysis, we could try to calculate the "GDP per capita", which is just, GDP / Population.

In [110]:
df[['Population','GDP']]

Unnamed: 0,Population,GDP
Canada,35.467,1785387.0
France,63.951,2833687.0
Germany,80.94,3874437.0
Italy,60.665,2167744.0
Japan,127.061,4602367.0
United Kingdom,64.511,2950039.0
United States,318.523,17348075.0


In [111]:
df['GDP'] / df['Population']

Unnamed: 0,0
Canada,50339.385908
France,44310.284437
Germany,47868.013343
Italy,35733.025633
Japan,36221.712406
United Kingdom,45729.239975
United States,54464.12033


In [112]:
df['GDP per Capita'] = df['GDP'] / df['Population']

In [113]:
df

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe,45729.239975
United States,318.523,17348075.0,9525067.0,0.915,America,54464.12033




---


# Statistical info
You've already seen the describe method, which gives you a good "summary" of the DataFrame. Let's explore other methods in more detail:

In [114]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent,GDP per Capita
Canada,35.467,1785387.0,9984670.0,0.913,America,50339.385908
France,63.951,2833687.0,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,0.916,Europe,47868.013343
Italy,60.665,2167744.0,301336.0,0.873,Europe,35733.025633
Japan,127.061,4602367.0,377930.0,0.891,Asia,36221.712406


In [115]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI,GDP per Capita
count,7.0,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429,44952.254576
std,97.24997,5494020.0,4576187.0,0.016592,6954.983875
min,35.467,1785387.0,242495.0,0.873,35733.025633
25%,62.308,2500716.0,329225.0,0.8895,40265.998421
50%,64.511,2950039.0,377930.0,0.907,45729.239975
75%,104.0005,4238402.0,5082873.0,0.914,49103.699626
max,318.523,17348080.0,9984670.0,0.916,54464.12033


In [116]:
population = df['Population']

In [119]:
population.min(), population.max()

(35.467, 318.523)

In [122]:
population.sum().item()

751.118

In [123]:
population.sum() / len(population)

np.float64(107.30257142857144)

In [124]:
population.mean()

np.float64(107.30257142857144)

In [125]:
population.median()

64.511

In [126]:
population.std()

97.24996987121581



---

