# Task 1:
### Titanic Survival Prediction:

In [20]:
# Importing Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [21]:
# Loading the Dataset

data = pd.read_csv(r'F:\CodSoft\TitanicSurvivalPrediction\Titanic-Dataset.csv')

In [None]:
data.head(5)

In [None]:
data.describe()

In [None]:
# Handling Missing Values:

data.isnull().sum()

In [25]:
imputer = SimpleImputer(strategy='mean')
data['Age'] = imputer.fit_transform(data[['Age']])

In [None]:
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
print('\nMissing Values after handling:', data.isnull().sum())

In [27]:
# Feature Scaling:

# Creating a new feature called family size:

data['FamilySize'] = data['SibSp'] + data['Parch'] + 1

# Drop less informative features
data.drop(['SibSp', 'Parch', 'Cabin', 'Ticket', 'Name'], axis=1, inplace=True)

In [28]:
# Encode Categorical Variables
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])
data['Embarked'] = label_encoder.fit_transform(data['Embarked'])

In [None]:
data.head(5)

In [30]:
# Feature Scaling
scaler = StandardScaler()
data[['Age', 'Fare']] = scaler.fit_transform(data[['Age', 'Fare']])

In [31]:
# Feature Selection

selected_features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'FamilySize']
X = data[selected_features]
y = data['Survived']

In [32]:
# Train-Test Split:

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

In [None]:
# Step 8: Hyperparameter Tuning for Random Forest
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters and model
best_model = grid_search.best_estimator_
print('\nBest Parameters:', grid_search.best_params_)

In [None]:
# Model Evaluation:

y_pred = best_model.predict(X_test)
print('\n Accuracy:', accuracy_score(y_test, y_pred))

In [None]:
# Confusion Matrix:

conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10,8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Reds', xticklabels=['Not Survived', 'Survived'], yticklabels=['Not Survived', 'Survived'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

print('\nClassification Report:\n', classification_report(y_test, y_pred))

In [None]:
# Important Features:

feature_imp = best_model.feature_importances_
plt.figure(figsize=(10,8))
sns.barplot(x = selected_features, y = feature_imp)
plt.title('Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')
plt.show()

In [None]:
# Save the Model:

joblib.dump(best_model, 'titanic_model.plk')

In [None]:
# Predicting the Survival:

def predict_survival():
    print('\nEnter passenger details to predict survival:')
    Pclass = int(input('Enter the Passenger Class (1-Upper, 2-Middle, 3-Lower):'))
    Sex = input('Enter Sex (male/female):').strip().lower()
    Age = float(input('Enter Age:'))
    Fare = float(input('Enter Fare:'))
    Embarked = input('Enter Embarked Port (C-Cherbourg, Q-Queenstown, S-Southampton):').strip().upper()
    FamilySize = int(input('Enter Family Size (including self):'))

    # Encode inputs
    Sex = 1 if Sex == 'male' else 0
    Embarked = {'C': 0, 'Q': 1, 'S': 2}.get(Embarked, 2)  # Default to 'S' if invalid

    # Scale numerical inputs
    scaled_values = scaler.transform([[Age, Fare]])
    Age, Fare = scaled_values[0][0], scaled_values[0][1]

    # Create input array
    user_input = np.array([[Pclass, Sex, Age, Fare, Embarked, FamilySize]])

    # Predict survival
    prediction = best_model.predict(user_input)
    result = 'Survived' if prediction[0] == 1 else 'Not Survived'
    print(f'\nPrediction: The passenger would have {result}.')

# Calling the prediction function
predict_survival()