# Imports

In [None]:
import pandas as pd

# Constants

In [None]:
TITANIC_CSV_DATA_URL = "Files/PandasFolder/Titanic.csv"

# Original column's names.
INDEX_COL = 0
NAME_COL = "Name"
SEX_COL = "Sex"
PCLASS_COL = "PClass"
AGE_COL = "Age"
SURVIVED_COL = "Survived"

# New column's names.
AGE_GROUP_COL = "AgeGroup"

# Other constants.
HEAD_ROWS_TO_SHOW = 15

# Data load

In [None]:
df = pd.read_csv(TITANIC_CSV_DATA_URL)
df[:HEAD_ROWS_TO_SHOW]

In [None]:
df = pd.read_csv(TITANIC_CSV_DATA_URL, index_col=INDEX_COL)
df[:HEAD_ROWS_TO_SHOW]

# Analyze of data types

In [None]:
df.dtypes

In [None]:
df.info()

In [None]:
df.shape

# Categorical features

In [None]:
df[SEX_COL][:HEAD_ROWS_TO_SHOW]

In [None]:
df[SEX_COL].value_counts()

In [None]:
df[PCLASS_COL].value_counts()

In [None]:
df[PCLASS_COL].value_counts().plot.pie()

# Quantitative features

In [None]:
df[AGE_COL].mean()

In [None]:
df[AGE_COL].plot(kind="hist")

In [None]:
df[AGE_COL].value_counts()

In [None]:
df.info()

In [None]:
df[AGE_GROUP_COL] = df[AGE_COL].map(lambda age: int(age // 10))
df[:HEAD_ROWS_TO_SHOW]

In [None]:
df[AGE_GROUP_COL] = df[AGE_COL]
df[AGE_GROUP_COL].fillna(-1, inplace=True)
df[AGE_GROUP_COL] = df[AGE_GROUP_COL].map(lambda age: int(age // 10) + 1)
df[:HEAD_ROWS_TO_SHOW]

In [None]:
df[AGE_GROUP_COL].value_counts()

In [None]:
df[AGE_GROUP_COL].value_counts().plot.pie()

# Investigation of the correlation between 2 features

In [None]:
df[[AGE_GROUP_COL, SURVIVED_COL]][:HEAD_ROWS_TO_SHOW]

In [None]:
age_by_survived = df.groupby([AGE_GROUP_COL, SURVIVED_COL])
age_by_survived

In [None]:
age_by_survived.size()

In [None]:
age_by_survived = age_by_survived.size().unstack()
age_by_survived

In [None]:
age_by_survived.fillna(0).plot(kind='barh', stacked=True)

In [None]:
age_by_survived_without_na = age_by_survived[1:].fillna(0)
age_by_survived_without_na

In [None]:
age_by_survived_without_na.plot(kind='barh', stacked=True)

In [None]:
z = df.groupby("AgeGroup")["Survived"].mean().reset_index(name="SurvPct")
z["NonSurvPct"] = 1 - z["SurvPct"]
z=z.set_index("AgeGroup")
z

In [None]:
z.plot(kind='barh', stacked=True)