**Import the Libraries**

In [None]:
# import the libraries
import pandas as pd
import numpy as np

In [None]:
# import the visualizations libraries
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

**Loading the dataset**

In [None]:
df = sns.load_dataset("titanic")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
#Remove columns that are similar
columns_to_drop = ["alive", "class", "embarked"]
df.drop(columns_to_drop, axis = 1, inplace = True)

In [None]:
df.head()

**Exploratory Data Analysis**

*Analyze missing values*

In [None]:
null_num = []
for i in df.columns:
    x = df[i].isnull().sum()
    null_num.append(x)

In [None]:
null_num

In [None]:
pd.DataFrame(null_num, index = df.columns, columns = ["Total missing values"])

In [None]:
#There's just too many missing values in the deck variable
#but before we remove it:
sns.set_style("darkgrid")
sns.countplot(x = "deck", data = df, palette = "viridis")

In [None]:
#Let's determine who's going to survive according to this variable


In [None]:
sns.set_style("darkgrid")
sns.countplot(x = "deck", data = df,hue = "survived", palette = "viridis")

In [None]:
columns_to_drop = ["deck"]
df.drop(columns_to_drop, axis = 1, inplace = True)

In [None]:
df.head()

*Analyzing the age variable according to the pclass variable*

In [None]:
plt.figure(figsize = (10,6))
sns.boxplot(x = "pclass", y = "age", data = df)

In [None]:
# function that fills the missing values in age column with the Q2s according to pclass
def fillna_age(columns):
    age = columns[0]
    pclass = columns[1]
# The function expects columns to be a list containing two elements: age and pclass.
# It assigns the first element to age and the second to pclass.
    
    if pd.isnull(age):
        if pclass == 1:
            return 38
        elif pclass == 2:
            return 29
        else:
            return 24
    else:
        return age

In [None]:
# apply the function to the age variable
df.age = df[["age", "pclass"]].apply(fillna_age, axis = 1)

In [None]:
# look for any missing values
null_num = []
for i in df.columns:
    x = df[i].isnull().sum()
    null_num.append(x)
pd.DataFrame(null_num, index = df.columns, columns = ["Total missing values"])

In [None]:
# drop the 2 NaNs 
df.dropna(inplace = True)

In [None]:
# look for any missing values
null_num = []
for i in df.columns:
    x = df[i].isnull().sum()
    null_num.append(x)
pd.DataFrame(null_num, index = df.columns, columns = ["Total missing values"])

**Binary Analysis**

*Analyze the target variable "Survived"*

In [None]:
sns.set_style("darkgrid")
sns.countplot(x = "survived", data = df, palette = "viridis")

In [None]:
df.survived.value_counts() # or df["survived"].value_counts()

In [None]:
sns.set_style("dark")
sns.countplot(x = "survived", hue = "alone", data = df, palette = "viridis")

*What's the effect of being an adult male on board?*

In [None]:
sns.set_style("ticks")
sns.countplot(x = "survived", hue = "adult_male", data = df, palette = "rainbow")

In [None]:
# Investigating the effect of gender on survival
sns.set_style("darkgrid")
sns.countplot(x = "survived", hue = "sex", data = df, palette = "viridis")

Majority of those who lost their lives were men
There were more female survivors
We can conclude that indeed women and kids were given priority

*Do the port cities, where the passengers embarked, impact survival?*

In [None]:
sns.set_style("darkgrid")
sns.countplot(x = "survived", hue = "embark_town", data = df, palette = "rainbow")

In [None]:
df.embark_town.value_counts()

*What are the chances of survival if one had either a sibling or spouse?*

In [None]:
sns.set_style("darkgrid")
sns.countplot(x = "survived", hue = "sibsp", data = df, palette = "rainbow")