In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')


In [None]:
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

print(df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].head())


In [None]:
# Drop missing target values
df.dropna(subset=['Survived'], inplace=True)

# Select input features and target
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].copy()
y = df['Survived'].copy()

# Encode 'Sex' as numeric
X['Sex'] = X['Sex'].map({'female': 0, 'male': 1})

# Fill missing Age values with median
X['Age'].fillna(X['Age'].median(), inplace=True)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [None]:
rf_model = RandomForestClassifier(
    n_estimators=100, max_depth=6, random_state=42)
rf_model.fit(X_train, y_train)


In [None]:
y_pred_rf = rf_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


In [None]:
sample = X_test.iloc[0:1]
prediction = rf_model.predict(sample)

print(f"\nSample Passenger: {sample.iloc[0].to_dict()}")
print(f"Predicted Survival: {'Survived' if prediction[0] == 1 else 'Did Not Survive'}")


In [None]:
import matplotlib.pyplot as plt

# Plot feature importances
importances = rf_model.feature_importances_
feature_names = X.columns

plt.barh(feature_names, importances, color='teal')
plt.xlabel("Feature Importance")
plt.title("Random Forest - Feature Importances")
plt.grid(True)
plt.show()
