# Appendix 3: Python Libraries Crash Course

## Part 3: Pandas for Tabular Data

### Creating your very first Pandas DataFrame (from csv)

In [None]:
import pandas as pd

In [None]:
pd.read_csv("titanic.csv")

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic

### Pandas Display Options and the methods head() & tail()

In [None]:
titanic

In [None]:
print(titanic)

In [None]:
pd.options.display.max_rows

In [None]:
pd.options.display.min_rows

In [None]:
titanic

In [None]:
titanic.head()

In [None]:
titanic.head(20)

In [None]:
titanic.tail()

In [None]:
titanic.tail(2)

### First Data Inspection

In [None]:
titanic

In [None]:
titanic.info()

In [None]:
titanic.describe()

In [None]:
titanic.describe(include = "O")

### Python Built-in Functions & DataFrame Attributes and Methods

In [None]:
titanic

#### DataFrames and Python Built-in Functions

In [None]:
type(titanic)

In [None]:
len(titanic)

In [None]:
round(titanic, 0)

In [None]:
#int(titanic)

In [None]:
min(titanic)

#### DataFrame Attributes

In [None]:
titanic.shape

In [None]:
titanic.size

In [None]:
titanic.index

In [None]:
titanic.columns

#### DataFrame Methods

In [None]:
titanic.head(n = 2)

In [None]:
titanic.info()

In [None]:
# titanic.min() # old

In [None]:
titanic.min(numeric_only = True) # new

#### Method Chaining

In [None]:
# titanic.mean().sort_values().head(2) # old

In [None]:
titanic.mean(numeric_only = True).sort_values().head(2) # new

### Selecting Columns

In [None]:
titanic

In [None]:
titanic["age"]

In [None]:
type(titanic["age"])

In [None]:
#titanic["age", "sex"]

In [None]:
titanic[["age", "sex"]]

In [None]:
type(titanic[["age", "sex"]])

In [None]:
titanic[["sex", "age"]]

In [None]:
titanic[["sex", "age", "fare"]]

In [None]:
type(titanic[["age"]])

### Selecting one Column with "dot notation"

In [None]:
titanic.age

In [None]:
titanic.age.equals(titanic["age"])

In [None]:
titanic.embarked

### Position-based Indexing and Slicing with iloc[]

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [None]:
summer

In [None]:
summer.info()

#### Selecting Rows with iloc[]

In [None]:
summer.iloc[0]

In [None]:
type(summer.iloc[0])

In [None]:
summer.iloc[1]

In [None]:
summer.iloc[-1]

In [None]:
summer.iloc[[1, 2, 3]]

In [None]:
summer.iloc[1:4]

In [None]:
summer.iloc[:5]

In [None]:
summer.iloc[-5:]

In [None]:
summer.iloc[:]

In [None]:
summer.iloc[[2, 45, 5467]]

#### Indexing/Slicing Rows and Columns with iloc[]

In [None]:
summer.head(10)

In [None]:
summer.iloc[0, 4]

In [None]:
summer.iloc[0, :3]

In [None]:
summer.iloc[0, [0, 2, 5, 7]]

In [None]:
summer.iloc[34:39, [0, 2, 5, 7]]

#### Selecting Columns with iloc[]

In [None]:
summer.iloc[:, 4].equals(summer.Country)

In [None]:
summer["Country"]

### Label-based Indexing and Slicing with loc[] 

In [None]:
summer

#### Selecting Rows with loc[]

In [None]:
summer.iloc[2]

In [None]:
summer.loc["DRIVAS, Dimitrios"]

In [None]:
summer.loc["PHELPS, Michael"]

#### Indexing/Slicing Rows and Columns with loc[]

In [None]:
summer.loc["PHELPS, Michael", "Medal"]

In [None]:
summer.loc["PHELPS, Michael", ["Medal", "Event"]]

In [None]:
summer.loc[["PHELPS, Michael", "LEWIS, Carl"], ["Medal", "Event"]]

In [None]:
summer.loc[:, ["Medal", "Event"]]

In [None]:
summer.head(10)

In [None]:
summer.loc[:"CHASAPIS, Spiridon"]

In [None]:
#summer.loc[:"PHELPS, Michael"]

In [None]:
#summer.loc["PHELPS, Michael":]

In [None]:
summer.head(20)

In [None]:
summer.loc["DRIVAS, Dimitrios":"BLAKE, Arthur", "City":"Discipline"]

In [None]:
#summer.loc[["PHELPS, Michael", "DUCK, Donald"]]

In [None]:
#summer.loc["PHELPS, Michael", ["Year", "Age"]]

### Summary and Outlook

#### Importing from CSV and first Inspection

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [None]:
summer

In [None]:
summer.info()

#### Selecting one Column

In [None]:
summer.Medal

In [None]:
summer["Medal"]

#### Selecting multiple Columns

In [None]:
summer[["Year", "Medal"]]

In [None]:
summer.loc[:, ["Year", "Medal"]]

#### Selecting positional rows

In [None]:
summer.iloc[10:21]

#### Selecting labeled rows

In [None]:
summer.loc["LEWIS, Carl"]

#### Putting it all together

In [None]:
summer[["Year", "Event", "Medal"]].loc["LEWIS, Carl"]

In [None]:
summer.loc["LEWIS, Carl"][["Year", "Event", "Medal"]]

In [None]:
summer.loc["LEWIS, Carl", ["Year", "Event", "Medal"]]

#### Outlook Pandas Objects

In [None]:
summer

In [None]:
type(summer)

In [None]:
summer["Year"]

In [None]:
type(summer["Year"])

In [None]:
summer.columns

In [None]:
type(summer.columns)

In [None]:
summer.index

In [None]:
type(summer.index)