In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Loading the dataset
df = pd.read_csv('../passengers-titanic.csv')

# Filling the missing 'age' values with the median age
df['age'].fillna(df['age'].median(), inplace=True)

# Converting 'pclass' and 'sex' to numerical values
# using LabelEncoder for 'pclass' as it's ordinal
le_pclass = LabelEncoder()
df['pclass'] = le_pclass.fit_transform(df['pclass'])

# Converting 'sex' to binary values, 0 for male and 1 for female
df['sex'] = df['sex'].map({'male': 0, 'female': 1})

# Displaying the first few rows 
print(df.head())


   row.names  pclass  survived  \
0          1       0         1   
1          2       0         0   
2          3       0         0   
3          4       0         0   
4          5       0         1   

                                              name      age     embarked  \
0                     Allen, Miss Elisabeth Walton  29.0000  Southampton   
1                      Allison, Miss Helen Loraine   2.0000  Southampton   
2              Allison, Mr Hudson Joshua Creighton  30.0000  Southampton   
3  Allison, Mrs Hudson J.C. (Bessie Waldo Daniels)  25.0000  Southampton   
4                    Allison, Master Hudson Trevor   0.9167  Southampton   

                         home.dest room      ticket   boat  sex  
0                     St Louis, MO  B-5  24160 L221      2    1  
1  Montreal, PQ / Chesterville, ON  C26         NaN    NaN    1  
2  Montreal, PQ / Chesterville, ON  C26         NaN  (135)    0  
3  Montreal, PQ / Chesterville, ON  C26         NaN    NaN    1  
4  Montr

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)


In [12]:
# Defineing features and target variable
X = df[['pclass', 'age', 'sex']]
y = df['survived']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
# Initialization of the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Train 
dt_model.fit(X_train, y_train)

# Make predictions and evaluate the model
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print(f'Decision Tree Accuracy: {dt_accuracy:.4f}')


Decision Tree Accuracy: 0.7985


In [14]:
# Initialization of the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train 
rf_model.fit(X_train, y_train)

# Make predictions and evaluate the model
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Random Forest Accuracy: {rf_accuracy:.4f}')


Random Forest Accuracy: 0.8061


In [17]:
# Experimenting with Decision Tree parameters
dt_model_depth = DecisionTreeClassifier(random_state=42, max_depth=5)
dt_model_depth.fit(X_train, y_train)
dt_predictions_depth = dt_model_depth.predict(X_test)
dt_accuracy_depth = accuracy_score(y_test, dt_predictions_depth)
print(f'Decision Tree Accuracy with max_depth=5: {dt_accuracy_depth:.4f}')

# Experimenting with Random Forest parameters
rf_model_adjusted = RandomForestClassifier(n_estimators=150, max_depth=10, random_state=42)
rf_model_adjusted.fit(X_train, y_train)
rf_predictions_adjusted = rf_model_adjusted.predict(X_test)
rf_accuracy_adjusted = accuracy_score(y_test, rf_predictions_adjusted)
print(f'Random Forest Accuracy with n_estimators=150 and max_depth=10: {rf_accuracy_adjusted:.4f}')


Decision Tree Accuracy with max_depth=5: 0.8289
Random Forest Accuracy with n_estimators=150 and max_depth=10: 0.7947
