# Pandas Dataframe

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({
        'Population': [12.93, 10.45, 18.5, 20.89],
        'GDP': [100057500, 108057500, 120057550, 200057500],
        'Surface Area': [9044567054, 9044567, 94067054, 904456],
        'Continent': ['Asia', 'Austrailia', 'Asia', 'Europe']
}, columns = ['Population', 'GDP', 'Surface Area', 'Continent'])

In [4]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent
0,12.93,100057500,9044567054,Asia
1,10.45,108057500,9044567,Austrailia
2,18.5,120057550,94067054,Asia
3,20.89,200057500,904456,Europe


In [5]:
df.index = ['Narnia', 'Winterfell', 'High Garden', 'Kings Landing']

In [6]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent
Narnia,12.93,100057500,9044567054,Asia
Winterfell,10.45,108057500,9044567,Austrailia
High Garden,18.5,120057550,94067054,Asia
Kings Landing,20.89,200057500,904456,Europe


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Narnia to Kings Landing
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Population    4 non-null      float64
 1   GDP           4 non-null      int64  
 2   Surface Area  4 non-null      int64  
 3   Continent     4 non-null      object 
dtypes: float64(1), int64(2), object(1)
memory usage: 160.0+ bytes


In [8]:
df.size

16

In [9]:
df.describe()

Unnamed: 0,Population,GDP,Surface Area
count,4.0,4.0,4.0
mean,15.6925,132057500.0,2287146000.0
std,4.830848,46072400.0,4505145000.0
min,10.45,100057500.0,904456.0
25%,12.31,106057500.0,7009539.0
50%,15.715,114057500.0,51555810.0
75%,19.0975,140057500.0,2331692000.0
max,20.89,200057500.0,9044567000.0


In [10]:
df.columns

Index(['Population', 'GDP', 'Surface Area', 'Continent'], dtype='object')

In [11]:
df.index

Index(['Narnia', 'Winterfell', 'High Garden', 'Kings Landing'], dtype='object')

In [12]:
df.shape

(4, 4)

In [13]:
df.dtypes

Population      float64
GDP               int64
Surface Area      int64
Continent        object
dtype: object

In [14]:
df.dtypes.value_counts()

int64      2
float64    1
object     1
Name: count, dtype: int64

In [15]:
df['Population'].to_frame()

Unnamed: 0,Population
Narnia,12.93
Winterfell,10.45
High Garden,18.5
Kings Landing,20.89


In [16]:
df.loc['Narnia']

Population           12.93
GDP              100057500
Surface Area    9044567054
Continent             Asia
Name: Narnia, dtype: object

![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

# Conditional Selection

In [17]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent
Narnia,12.93,100057500,9044567054,Asia
Winterfell,10.45,108057500,9044567,Austrailia
High Garden,18.5,120057550,94067054,Asia
Kings Landing,20.89,200057500,904456,Europe


In [18]:
df['Population'] > 15

Narnia           False
Winterfell       False
High Garden       True
Kings Landing     True
Name: Population, dtype: bool

In [19]:
df.loc[df['Population'] > 15]

Unnamed: 0,Population,GDP,Surface Area,Continent
High Garden,18.5,120057550,94067054,Asia
Kings Landing,20.89,200057500,904456,Europe


In [20]:
df.loc[df['Population'] > 15, 'Continent']

High Garden        Asia
Kings Landing    Europe
Name: Continent, dtype: object

In [21]:
df.loc[df['Population'] > 15, ['Population', 'Continent']]

Unnamed: 0,Population,Continent
High Garden,18.5,Asia
Kings Landing,20.89,Europe


In [22]:
df.drop('Continent', axis='columns')

Unnamed: 0,Population,GDP,Surface Area
Narnia,12.93,100057500,9044567054
Winterfell,10.45,108057500,9044567
High Garden,18.5,120057550,94067054
Kings Landing,20.89,200057500,904456


In [23]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent
Narnia,12.93,100057500,9044567054,Asia
Winterfell,10.45,108057500,9044567,Austrailia
High Garden,18.5,120057550,94067054,Asia
Kings Landing,20.89,200057500,904456,Europe


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

# Modifying Dataframes
### Operations between series

In [24]:
new_series = pd.Series([-1.5, 1000], index=['Population', 'GDP'])

In [25]:
new_series

Population      -1.5
GDP           1000.0
dtype: float64

In [26]:
df[['Population', 'GDP']] + new_series

Unnamed: 0,Population,GDP
Narnia,11.43,100058500.0
Winterfell,8.95,108058500.0
High Garden,17.0,120058550.0
Kings Landing,19.39,200058500.0


In [27]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent
Narnia,12.93,100057500,9044567054,Asia
Winterfell,10.45,108057500,9044567,Austrailia
High Garden,18.5,120057550,94067054,Asia
Kings Landing,20.89,200057500,904456,Europe


In [28]:
df[['Population', 'GDP']] += new_series
df

Unnamed: 0,Population,GDP,Surface Area,Continent
Narnia,11.43,100058500.0,9044567054,Asia
Winterfell,8.95,108058500.0,9044567,Austrailia
High Garden,17.0,120058550.0,94067054,Asia
Kings Landing,19.39,200058500.0,904456,Europe


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

# Adding New Column

In [29]:
new_lang_colunn = pd.Series(
    ['Old Valyrian', 'High Valyrian', 'Dothraki'],
    index = ['Narnia', 'Winterfell', 'Kings Landing'],
    name = 'Language'
)

In [30]:
df['Language'] = new_lang_colunn

In [31]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent,Language
Narnia,11.43,100058500.0,9044567054,Asia,Old Valyrian
Winterfell,8.95,108058500.0,9044567,Austrailia,High Valyrian
High Garden,17.0,120058550.0,94067054,Asia,
Kings Landing,19.39,200058500.0,904456,Europe,Dothraki


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

### Changing values in the entire column

In [32]:
df['Language'] = 'Espanol'

In [33]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent,Language
Narnia,11.43,100058500.0,9044567054,Asia,Espanol
Winterfell,8.95,108058500.0,9044567,Austrailia,Espanol
High Garden,17.0,120058550.0,94067054,Asia,Espanol
Kings Landing,19.39,200058500.0,904456,Europe,Espanol


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

### Renaming column

In [34]:
df.rename(
    columns = {
        'GDP': "Gross Domestic Product"
    },
    index = {
        'High Garden': 'HG',
        'Kings Landing': 'KL'
    }
)

Unnamed: 0,Population,Gross Domestic Product,Surface Area,Continent,Language
Narnia,11.43,100058500.0,9044567054,Asia,Espanol
Winterfell,8.95,108058500.0,9044567,Austrailia,Espanol
HG,17.0,120058550.0,94067054,Asia,Espanol
KL,19.39,200058500.0,904456,Europe,Espanol


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

### Creating Columns from calculation between other columns like in Excel

In [35]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent,Language
Narnia,11.43,100058500.0,9044567054,Asia,Espanol
Winterfell,8.95,108058500.0,9044567,Austrailia,Espanol
High Garden,17.0,120058550.0,94067054,Asia,Espanol
Kings Landing,19.39,200058500.0,904456,Europe,Espanol


In [36]:
df['GDP'] / df['Population']

Narnia           8.754024e+06
Winterfell       1.207358e+07
High Garden      7.062268e+06
Kings Landing    1.031761e+07
dtype: float64

In [37]:
df['GDP per Capita'] = df['GDP'] / df['Population']

In [38]:
df

Unnamed: 0,Population,GDP,Surface Area,Continent,Language,GDP per Capita
Narnia,11.43,100058500.0,9044567054,Asia,Espanol,8754024.0
Winterfell,8.95,108058500.0,9044567,Austrailia,Espanol,12073580.0
High Garden,17.0,120058550.0,94067054,Asia,Espanol,7062268.0
Kings Landing,19.39,200058500.0,904456,Europe,Espanol,10317610.0


In [39]:
df.head()

Unnamed: 0,Population,GDP,Surface Area,Continent,Language,GDP per Capita
Narnia,11.43,100058500.0,9044567054,Asia,Espanol,8754024.0
Winterfell,8.95,108058500.0,9044567,Austrailia,Espanol,12073580.0
High Garden,17.0,120058550.0,94067054,Asia,Espanol,7062268.0
Kings Landing,19.39,200058500.0,904456,Europe,Espanol,10317610.0
