In [29]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

In [None]:
df = pd.read_csv("titanic.csv")
df

In [None]:
print(df.shape)

In [None]:
df.describe()

In [22]:
def plot_age_fare_relation(df):
    """
    Relational Graph
    Creates a line graph of Age vs Fare to examine the relation between passenger age and ticket fare.
    """
    plt.figure(figsize=(10, 6))
    sns.lineplot(data=df, x="Age", y="Fare", marker="o", errorbar=None)
    plt.title("Fare vs Age Relationship")
    plt.xlabel("Age")
    plt.ylabel("Fare")
    plt.show()

In [None]:
plot_age_fare_relation(df)

In [None]:
def plot_survival_by_class(df):
    """
    Creates a bar chart showing survival rates by passenger class.
    """
    plt.figure(figsize=(8, 5))
    survival_rates = df.groupby("Pclass")["Survived"].mean() * 100
    sns.barplot(x=survival_rates.index, y=survival_rates.values, hue=survival_rates.index, palette="muted", legend=False)
    plt.title("Survival Rate by Passenger Class")
    plt.xlabel("Passenger Class")
    plt.ylabel("Survival Rate (%)")
    plt.show()

In [None]:
plot_survival_by_class(df)

In [31]:
def plot_selected_correlation_heatmap(df, selected_columns):
    """
    Creates a heatmap to show correlations between selected numerical features in the Titanic dataset.
    """
    # Subset the DataFrame to include only the selected columns
    selected_df = df[selected_columns]
    
    # Filling NaNs with column means
    selected_df = selected_df.fillna(selected_df.mean())
    
    plt.figure(figsize=(10, 8))
    correlation_matrix = selected_df.corr()
    sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", center=0, vmin=-1, vmax=1)
    plt.title("Correlation Heatmap of Selected Titanic Dataset Features")
    plt.show()

In [None]:
# Example usage with selected variables
selected_columns = ['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
plot_selected_correlation_heatmap(df, selected_columns)