In [3]:
import pandas as pd

# Load dataset (ensure titanic.csv is in the same folder)
df = pd.read_csv("/content/drive/MyDrive/titanic.csv")

df.head()

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked
0,Passenger1,1,male,35,80.0,1,S
1,Passenger2,1,female,28,110.0,1,C
2,Passenger3,1,female,40,83.5,0,Q
3,Passenger4,1,male,52,134.5,1,S


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      4 non-null      object 
 1   Pclass    4 non-null      int64  
 2   Sex       4 non-null      object 
 3   Age       4 non-null      int64  
 4   Fare      4 non-null      float64
 5   Survived  4 non-null      int64  
 6   Embarked  4 non-null      object 
dtypes: float64(1), int64(3), object(3)
memory usage: 356.0+ bytes


#Problem 1 - Sorting

In [5]:
fare = df[['Fare']]
fare.head()

Unnamed: 0,Fare
0,80.0
1,110.0
2,83.5
3,134.5


In [6]:
class_age = df[['Pclass', 'Age']]
class_age.head()

Unnamed: 0,Pclass,Age
0,1,35
1,1,28
2,1,40
3,1,52


In [7]:
survived_gender = df[['Survived', 'Sex']]
survived_gender.head()

Unnamed: 0,Survived,Sex
0,1,male
1,1,female
2,0,female
3,1,male


#Problem 2 - Subsetting

In [8]:
fare_gt_100 = df[df['Fare'] > 100]
fare_gt_100

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked
1,Passenger2,1,female,28,110.0,1,C
3,Passenger4,1,male,52,134.5,1,S


In [9]:
first_class = df[df['Pclass'] == 1]
first_class

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked
0,Passenger1,1,male,35,80.0,1,S
1,Passenger2,1,female,28,110.0,1,C
2,Passenger3,1,female,40,83.5,0,Q
3,Passenger4,1,male,52,134.5,1,S


In [10]:
female_under_18 = df[(df['Age'] < 18) & (df['Sex'] == 'female')]
female_under_18

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked


In [11]:
embarked_c_or_s = df[df['Embarked'].isin(['C', 'S'])]
embarked_c_or_s

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked
0,Passenger1,1,male,35,80.0,1,S
1,Passenger2,1,female,28,110.0,1,C
3,Passenger4,1,male,52,134.5,1,S


In [12]:
first_second_class = df[df['Pclass'].isin([1,2])]
first_second_class

Unnamed: 0,Name,Pclass,Sex,Age,Fare,Survived,Embarked
0,Passenger1,1,male,35,80.0,1,S
1,Passenger2,1,female,28,110.0,1,C
2,Passenger3,1,female,40,83.5,0,Q
3,Passenger4,1,male,52,134.5,1,S


#EDA Practice - Fare Per Year

In [13]:
df['Age'] = df['Age'].fillna(df['Age'].median())
df['fare_per_year'] = df['Fare'] / df['Age']

high_fare_age = df[df['fare_per_year'] > 5]
high_fare_age_srt = high_fare_age.sort_values(by='fare_per_year', ascending=False)

result = high_fare_age_srt[['Name', 'fare_per_year']]
result.head()

Unnamed: 0,Name,fare_per_year


#EDA Practice - Adult Males Highest Fare Per Class

In [14]:
df['fare_per_class'] = df['Fare'] / df['Pclass']

adult_males = df[(df['Sex']=='male') & (df['Age'] >= 18)]
adult_males_srt = adult_males.sort_values(by='fare_per_class', ascending=False)

result2 = adult_males_srt[['Name','Age','fare_per_class']]
result2.head()

Unnamed: 0,Name,Age,fare_per_class
3,Passenger4,52,134.5
0,Passenger1,35,80.0


#Group-by Practice - Fare Revenue by Class

In [15]:
total_fare = df['Fare'].sum()

fare1 = df[df['Pclass']==1]['Fare'].sum()
fare2 = df[df['Pclass']==2]['Fare'].sum()
fare3 = df[df['Pclass']==3]['Fare'].sum()

totals = [fare1, fare2, fare3]
proportions = [f/total_fare for f in totals]
proportions

[np.float64(1.0), np.float64(0.0), np.float64(0.0)]

#Group-by Practice - Age Group Percentage

In [16]:
def group_age(a):
    if a < 18:
        return 'child'
    elif a < 65:
        return 'adult'
    else:
        return 'senior'

df['age_group'] = df['Age'].apply(group_age)

total_passengers = len(df)
age_counts = df['age_group'].value_counts()
percentages = (age_counts / total_passengers) * 100
percentages

Unnamed: 0_level_0,count
age_group,Unnamed: 1_level_1
adult,100.0
