# Appendix 3: Python Libraries Crash Course

## Part 5: Pandas Intermediate 2

## Filtering DataFrames with one Condition

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head(10)

In [None]:
titanic.sex.head(10)

In [None]:
titanic.sex == "male"

In [None]:
titanic[titanic.sex == "male"]["fare"]

In [None]:
titanic.loc[titanic.sex == "male", "fare"]

In [None]:
mask1 = titanic.sex == "male"
mask1

In [None]:
titanic_male = titanic.loc[mask1]

In [None]:
titanic_male.head()

In [None]:
titanic.dtypes# == object

In [None]:
mask2 = titanic.dtypes == object
mask2

In [None]:
titanic.loc[:, ~mask2]

In [None]:
titanic.loc[mask1, ~mask2]

## Filtering DataFrames with many Conditions (AND)

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head(10)

In [None]:
mask1 = titanic.sex == "male"
mask1.head()

In [None]:
mask2 = titanic.age > 14
mask2.head()

In [None]:
(mask1 & mask2).head()

In [None]:
male_adult = titanic.loc[mask1 % mask2, ["survived", "pclass", "sex", "age"]]
male_adult.head(20)

In [None]:
male_adult.info()

In [None]:
male_adult.describe()

In [None]:
titanic.describe()

## Filtering DataFrames with many Conditions (OR)

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
mask1 = titanic.sex == "female"
mask1.head(20)

In [None]:
mask2 = titanic.age < 14
mask2.head(20)

In [None]:
(mask1 | mask2).head(11)

In [None]:
titanic.loc[mask1 | mask2]

In [None]:
wom_or_chi = titanic.loc[mask1 | mask2, ["survived", "pclass", "sex", "age"]]

In [None]:
wom_or_chi.head()

In [None]:
wom_or_chi.info()

In [None]:
wom_or_chi.describe()

In [None]:
titanic.describe()

## Advanced Filtering with between(), isin() and ~

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
og_1988 = summer.loc[summer.Year == 1988]

In [None]:
og_1988.head()

In [None]:
og_1988.tail()

In [None]:
og_1988.info()

In [None]:
og_since1992 = summer.loc[summer.Year >= 1992]

In [None]:
og_since1992.head()

In [None]:
og_since1992.tail()

In [None]:
summer.Year.between(1960, 1969).head()

In [None]:
#og_60s = summer.loc[summer.Year.between(1960, 1969, inclusive=True)] # old

In [None]:
og_60s = summer.loc[summer.Year.between(1960, 1969, inclusive="both")] # new

In [None]:
og_60s.head()

In [None]:
og_60s.tail()

In [None]:
my_favourite_games = [1972, 1996]

In [None]:
summer.Year.isin(my_favourite_games).head()

In [None]:
og_72_96 = summer.loc[summer.Year.isin(my_favourite_games)]

In [None]:
og_72_96.head()

In [None]:
og_72_96.tail()

In [None]:
og_not_72_96 = summer.loc[~summer.Year.isin(my_favourite_games)]

In [None]:
og_not_72_96.head()

In [None]:
og_not_72_96.Year.unique()

## Intro to NA Values

In [None]:
import pandas as pd
import numpy as np

In [None]:
sales = pd.read_csv("sales.csv", index_col = 0)

In [None]:
sales

In [None]:
sales.info()

In [None]:
sales.loc["Steven", "Thu"]

In [None]:
sales.iloc[1,1] = None

In [None]:
sales

In [None]:
sales.iloc[2,2] = np.nan

In [None]:
sales

In [None]:
sales.info()

## Handling NA Values / missing Values

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.info()

In [None]:
titanic.isna().sum()

In [None]:
titanic.notna().sum()

In [None]:
titanic.loc[titanic.embarked.isna()]

In [None]:
titanic.shape

In [None]:
titanic.dropna()

In [None]:
titanic.dropna().shape

In [None]:
titanic.dropna(how = "all").shape

In [None]:
titanic.dropna(axis = 1, how = "any").shape

In [None]:
titanic.dropna(axis = 1, thresh = 500).shape

In [None]:
titanic.dropna(axis = 1, thresh = 500, inplace = True)

In [None]:
titanic.info()

In [None]:
titanic.loc[titanic.age.isna()]

In [None]:
mean_age = titanic.age.mean()
mean_age

In [None]:
# titanic.age.fillna(value = mean_age, inplace = True) # old

In [None]:
titanic["age"] = titanic["age"].fillna(value = mean_age) # new

In [None]:
titanic.age

In [None]:
titanic.info()

## Exporting DataFrames to csv

In [None]:
titanic.head()

In [None]:
titanic.to_csv("clean_df.csv", index = False)

In [None]:
pd.read_csv("clean_df.csv")

## Summary Statistics and Accumulations

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count(axis = "columns")

In [None]:
titanic.count(axis = 1)

In [None]:
#titanic.mean(axis = 1) # old

In [None]:
titanic.mean(axis = 1, numeric_only=True) # new

In [None]:
#titanic.sum(axis = 0) # old

In [None]:
titanic.sum(axis = 0, numeric_only=True) # new

In [None]:
titanic.head()

In [None]:
titanic.fare.cumsum(axis = 0)

In [None]:
#titanic.corr() # old

In [None]:
titanic.corr(numeric_only=True) # new

In [None]:
titanic.survived.corr(titanic.pclass)