# Importing Libraries and Dataset

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
df=pd.read_csv('/kaggle/input/titanic-dataset/titanic_train.csv')

In [None]:
df

In [None]:
df.columns

# Exploratory Data Analysis(EDA)

In [None]:
sns.countplot(x='Survived', data=df, palette=['red', 'green'])
plt.title('Count of Survivors and Non-Survivors')
plt.show()

In [None]:
plt.pie(df['Survived'].value_counts(), labels=['Not Survived', 'Survived'], autopct='%1.1f%%', colors=['red', 'green'])
plt.title('Pie Chart of Survivors and Non-Survivors')
plt.show()

In [None]:
sns.histplot(df['Age'].dropna(), bins=20)
plt.title('Histogram of Passengers Ages')
plt.show()

In [None]:
sns.countplot(x='Pclass', data=df, palette=['green', 'blue', 'red'])
plt.title('Count of Passengers in Different Classes')
plt.show()

In [None]:
sns.countplot(x='Embarked', data=df, palette=['green', 'blue', 'red'])
plt.title('Count of Passengers Embarked at Different Ports')
plt.show()

# Data Preprocessing

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
round(df.isnull().sum()/len(df)*100)

In [None]:
df['Sex'].nunique()

In [None]:
df['Embarked'].nunique()

In [None]:
df.columns

In [None]:
df=df[['Survived','Pclass','Sex', 'Age', 'SibSp','Parch', 'Fare', 'Embarked']]

In [None]:
df['Male']=pd.get_dummies(df['Sex'], drop_first=True,dtype=int)

In [None]:
df=df.drop('Sex',axis=1)

In [None]:
df[['Q','S']]=pd.get_dummies(df['Embarked'],drop_first=True)

In [None]:
df=df.drop('Embarked',axis=1)

In [None]:
df.info()

In [None]:
df['Age']=df['Age'].fillna(df['Age'].mean())

In [None]:
df.isnull().sum()

# Train and Test the split data

In [None]:
X=df.drop('Survived',axis=1)
y=df['Survived']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=91)

# Using Variour Machine Learning Algorithms

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
models={
    'Logistic Regression':LogisticRegression(max_iter=10000, random_state=42),
    'KNN':KNeighborsClassifier(),
    'Decision Tree':DecisionTreeClassifier(),
    'Random Forest':RandomForestClassifier(n_estimators=100, random_state=42) }

In [None]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)  
    y_pred = model.predict(X_test)  
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    return accuracy, precision, recall, f1

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
results=[]
for name,model in models.items():
    accuracy,precision,recall,f1=evaluate_model(model, X_train, X_test, y_train, y_test)
    results.append({
        "Model":name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })
    

In [None]:
results_df=pd.DataFrame(results)
results_df

In [None]:
best_model = results_df.loc[results_df['Accuracy'].idxmax()]
print("\nBest Model (Machine Learning):")
print(best_model)

metrics = ["Accuracy", "Precision", "Recall", "F1-Score"]
colors = ["skyblue", "orange", "green", "red"]

fig, ax = plt.subplots(figsize=(10, 6))

for i, metric in enumerate(metrics):
    ax.barh(results_df['Model'] + f" ({metric})", results_df[metric], color=colors[i], alpha=0.7, label=metric)

ax.set_xlabel("Score", fontsize=12)
ax.set_title("Model Performance Comparison", fontsize=18)
ax.legend()
plt.tight_layout()
plt.show()

print(f"\nOptimal Model based on Accuracy: {best_model['Model']} with Accuracy = {best_model['Accuracy']:.4f}")