In [2]:
import pandas as pd

# Load Titanic dataset from a popular public dataset repository
# As internet access is not available, I will use a simplified dataset stored locally
titanic_data = {
    'PassengerId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Survived': [0, 1, 1, 1, 0, 0, 1, 0, 1, 1],
    'Pclass': [3, 1, 3, 1, 3, 3, 1, 3, 3, 2],
    'Name': ['Braund, Mr. Owen Harris', 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)', 
             'Heikkinen, Miss. Laina', 'Futrelle, Mrs. Jacques Heath (Lily May Peel)', 
             'Allen, Mr. William Henry', 'Moran, Mr. James', 'McCarthy, Mr. Timothy J', 
             'Palsson, Master. Gosta Leonard', 'Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)', 
             'Nasser, Mrs. Nicholas (Adele Achem)'],
    'Sex': ['male', 'female', 'female', 'female', 'male', 'male', 'male', 'male', 'female', 'female'],
    'Age': [22, 38, 26, 35, 35, None, 54, 2, 27, 14],
    'SibSp': [1, 1, 0, 1, 0, 0, 0, 3, 0, 0],
    'Parch': [0, 0, 0, 0, 0, 0, 0, 1, 2, 0],
    'Ticket': ['A/5 21171', 'PC 17599', 'STON/O2. 3101282', '113803', '373450', '330877', 
               '17463', '349909', '347742', '237736'],
    'Fare': [7.25, 71.2833, 7.925, 53.1, 8.05, 8.4583, 51.8625, 21.075, 11.1333, 30.0708],
    'Cabin': [None, 'C85', None, 'C123', None, None, 'E46', None, None, None],
    'Embarked': ['S', 'C', 'S', 'S', 'S', 'Q', 'S', 'S', 'S', 'C']
}

# Converting to DataFrame
titanic_df = pd.DataFrame(titanic_data)

# Data Cleaning
# Fill missing 'Age' values with the median age
titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)

# Exploratory Data Analysis (EDA)
# Checking basic statistics and information
basic_info = titanic_df.describe()
missing_values = titanic_df.isnull().sum()

# Analyze the relationship between Survival and Passenger Class
survival_by_class = titanic_df.groupby('Pclass')['Survived'].mean()

# Analyze the relationship between Survival and Gender
survival_by_gender = titanic_df.groupby('Sex')['Survived'].mean()

# Analyze the relationship between Age and Survival
age_survived = titanic_df[['Age', 'Survived']]

basic_info, missing_values, survival_by_class, survival_by_gender, age_survived.head()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)


(       PassengerId   Survived     Pclass        Age      SibSp      Parch  \
 count     10.00000  10.000000  10.000000  10.000000  10.000000  10.000000   
 mean       5.50000   0.600000   2.300000  28.000000   0.600000   0.300000   
 std        3.02765   0.516398   0.948683  14.094916   0.966092   0.674949   
 min        1.00000   0.000000   1.000000   2.000000   0.000000   0.000000   
 25%        3.25000   0.000000   1.250000  23.000000   0.000000   0.000000   
 50%        5.50000   1.000000   3.000000  27.000000   0.000000   0.000000   
 75%        7.75000   1.000000   3.000000  35.000000   1.000000   0.000000   
 max       10.00000   1.000000   3.000000  54.000000   3.000000   2.000000   
 
             Fare  
 count  10.000000  
 mean   27.020820  
 std    23.601938  
 min     7.250000  
 25%     8.152075  
 50%    16.104150  
 75%    46.414575  
 max    71.283300  ,
 PassengerId    0
 Survived       0
 Pclass         0
 Name           0
 Sex            0
 Age            0
 SibSp 