In [None]:
import pandas as pd
import numpy as np

In [None]:
g7 = pd.Series([35.467, 63.951, 80.940, 60.665, 127.061, 64.511, 318.523])

In [None]:
g7

In [None]:
g7.name = 'G7 Population in millions'

In [None]:
g7[0]

In [None]:
g7.index

In [None]:
g7.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

In [None]:
g7

In [None]:
g7_example = pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.94,
    'Italy': 60.665
})

##Indexing

In [None]:
g7['Canada']

In [None]:
g7.iloc[0] # If you still want to get value from index

In [None]:
g7[['Canada', 'Italy']]

In [None]:
g7['Canada': 'Italy']

##Conditional Selection

In [None]:
g7 > 70

In [None]:
g7[g7 > 70]

In [None]:
g7.mean()

In [None]:
g7.std()

##Operations and Methods

In [None]:
g7 * 1_000_000

In [None]:
np.log(g7)

##Modifying Series

In [None]:
g7['Canada'] = 40.5

In [None]:
g7

In [None]:
g7.iloc[-1] = 500

In [None]:
g7

In [None]:
g7[g7 > 70] = 99.99

In [None]:
g7

#Dataframes

In [None]:
df = pd.DataFrame({
    'Population': [35.4, 63.9, 80.9, 60.6, 127.0, 64.5, 318.5],
    'GDP': [1785387, 2833687, 3874437, 2167744, 4602367, None, None],
    'Surface Area': [9984670, 640679, 357114, 301336, 377930, None, None],
    'HDI': [0.913, 0.888, 0.916, 0.8, None, None, None],
    'Continent': ['America', 'Europe', 'Europe', 'Europe', 'Asia', 'Europe', 'America']
}, columns=['Population', 'GDP', 'Surface Area', 'HDI', 'Continent'])

In [None]:
df

In [None]:
df.index = [
    'Canada',
    'France',
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States',
]

In [None]:
df

In [None]:
df.columns

In [None]:
df.index

In [None]:
df.info()

In [None]:
df.size

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.dtypes.value_counts()

##Indexing and Selection

In [None]:
df

In [None]:
df.loc['Canada']

In [None]:
df['Population']

In [None]:
df.iloc[-1]

In [None]:
df['Population'].to_frame()

In [None]:
df[['Population', 'GDP']]

In [None]:
df[1:3]

In [None]:
df.loc['Italy']

In [None]:
df.loc['France': 'Italy']

In [None]:
df.loc['France': 'Italy', 'Population']

In [None]:
df.loc['France': 'Italy', ['Population', 'GDP']]

In [None]:
df

In [None]:
df.iloc[0]

In [None]:
df.iloc[-1]

In [None]:
df.iloc[[0, 1, -1]]

In [None]:
df.iloc[1:3]

In [None]:
df.iloc[1:3, 3]

In [None]:
df.iloc[1:3, [0, 3]]

In [None]:
df.iloc[1:3, 1:3]

##Conditional Selection (Boolean arrays)

In [None]:
df

In [None]:
df['Population'] > 70

In [None]:
df.loc[df['Population'] > 70]

In [None]:
df.loc[df['Population'] > 70, 'Population']

In [None]:
df.loc[df['Population'] > 70, ['Population', 'GDP']]

##Dropping

In [None]:
df.drop('Canada')

In [None]:
df.drop(['Canada', 'Japan'])

In [None]:
df.drop(columns=['Population', 'HDI'])

In [None]:
df.drop(['Italy', 'Canada'], axis='rows')

In [None]:
df.drop(['Population', 'HDI'], axis='columns')

##Operations

In [None]:
df[['Population', 'GDP']]

In [None]:
df[['Population', 'GDP']] / 100

In [None]:
crisis = pd.Series([-1_000_000, -0.3], index=['GDP', 'HDI'])
crisis

In [None]:
df[['GDP', 'HDI']] + crisis

##Modifying DataFrames

In [None]:
langs = pd.Series(
    ['French', 'German', 'Italian'],
    index=['France', 'Germany', 'Italy'],
    name='Language'
)

In [None]:
df['Language'] = langs

In [None]:
df

In [None]:
df['Language'] = 'English'

In [None]:
df

In [None]:
df.loc[df['Language'] == 'English', 'Language'] = 'French'

In [None]:
df

In [None]:
df.rename(
    columns={
        'HDI': 'Human Development Index',
        'Anual Popcorn Consumption': 'APC'
    }, index={
        'United States': 'USA',
        'United Kingdom': 'UK',
        'Argentina': 'AR'
    })

In [None]:
df.drop(columns='Language', inplace=True)

In [None]:
df = pd.concat([
    df,
    pd.DataFrame([{
        'Population': 3,
        'GDP': 5
    }], index=['China'])
])

In [None]:
df

##Statistics

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
population = df['Population']

In [None]:
population.min(), population.max()

In [None]:
population.sum()

In [None]:
population.sum() / len(population)

In [None]:
population.mean()

In [None]:
population.std()

In [None]:
population.median()

In [None]:
population.describe()

In [None]:
population.quantile(.25)

In [None]:
population.quantile([.2, .4, .6, .8, 1])

##Hands On

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
pd.read_csv('sample_data/california_housing_train.csv')

In [None]:
df = pd.read_csv('sample_data/california_housing_train.csv')

In [None]:
df.head()

In [None]:
df.tail(3)

In [None]:
df.shape

In [None]:
df.dtypes

##Plotting Basics

In [None]:
df.plot()