In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Analyze the number of passengers in each category of "who"
# The 'who' column represents the gender/age group of the passengers (male, female, child)
df = pd.read_csv('titanic.csv')
who_counts = df['Name'].value_counts()
print("Number of passengers in each category of 'who':")
print(who_counts)

In [None]:
#2. Calculate the median age of passengers in the first class (Pclass == 1)
median_age_first_class = df[df['Pclass'] == 1]['Age'].median()
print(f"\nMedian age of passengers in the first class: {median_age_first_class}")

In [None]:
# 3. Find the survival rate for passengers in the lowest class (third class)
survival_rate_third_class = df[df['Pclass'] == 3]['Survived'].mean()
print(f"\nSurvival rate for passengers in the third class: {survival_rate_third_class:.2f}")

In [None]:
# 4. Replace missing 'Embarked' values with the most common (mode) value
most_common_embarked = df['Embarked'].mode()[0]
df['Embarked'].fillna(most_common_embarked, inplace=True)
print(f"\nMissing 'Embarked' values have been replaced with the most common value: {most_common_embarked}")

In [None]:
# 5. Create a histogram of fares paid by survivors and non-survivors
plt.figure(figsize=(10, 6))

# Split data into survivors and non-survivors
survivors = df[df['Survived'] == 1]
non_survivors = df[df['Survived'] == 0]

# Plot histogram for survivors
plt.hist(survivors['Fare'], bins=30, alpha=0.5, label='Survivors')

# Plot histogram for non-survivors
plt.hist(non_survivors['Fare'], bins=30, alpha=0.5, label='Non-Survivors')

plt.title('Fare Distribution for Survivors and Non-Survivors')
plt.xlabel('Fare')
plt.ylabel('Frequency')
plt.legend()

plt.show()


In [None]:
# 6. Compare survival rates for passengers traveling with family and those traveling alone
# Create a new column to identify passengers with family
df['Has_Family'] = (df['SibSp'] + df['Parch']) > 0

# Calculate survival rates for passengers with and without family
survival_rate_family = df[df['Has_Family'] == True]['Survived'].mean()
survival_rate_alone = df[df['Has_Family'] == False]['Survived'].mean()

print(f"\nSurvival rate for passengers traveling with family: {survival_rate_family:.2f}")
print(f"Survival rate for passengers traveling alone: {survival_rate_alone:.2f}")