In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Titanic dataset
dataset_path = (r"C:\Users\xavie\OneDrive\Desktop\codsoft_internship\Titanic_Dataset.csv")
data = pd.read_csv(dataset_path)

# Data preprocessing: handling missing values and encoding categorical features
data["Age"] = data["Age"].fillna(data["Age"].median())
data["Embarked"] = data["Embarked"].fillna("S")
data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
data["Embarked"] = data["Embarked"].map({"S": 0, "C": 1, "Q": 2})

# Ensure column naming consistency
if "Passenger_class" in data.columns:
    data.rename(columns={"Passenger_class": "Pclass"}, inplace=True)

# Define features (input) and target (output)
features = data[["Pclass", "Sex", "Age", "Fare", "Embarked"]]
target = data["Survived"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train a Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predict outcomes on the test set and evaluate the model's performance
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy:.2f}")

# Display additional evaluation metrics
print("\nClassification Report:")
print(classification_report(y_test, predictions))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))

# Function to predict survival for a user-provided passenger's details and retrieve the name
def predict_passenger_survival():
    try:
        # Collect passenger details from user input
        print("\nEnter passenger details for survival prediction:")
        pclass = int(input("Enter Passenger class (1, 2, or 3): "))
        sex = int(input("Enter Sex (0 for male, 1 for female): "))
        age = float(input("Enter Age: "))
        fare = float(input("Enter Fare: "))
        embarked = int(input("Enter Embarked (0 for S, 1 for C, 2 for Q): "))

        # Create a DataFrame from the input data for prediction
        passenger_data = pd.DataFrame(
            [[pclass, sex, age, fare, embarked]],
            columns=["Pclass", "Sex", "Age", "Fare", "Embarked"]
        )

        # Search for a matching passenger from the dataset
        matching_passenger = data[
            (data["Pclass"] == pclass)
            & (data["Sex"] == sex)
            & (data["Age"] == age)
            & (data["Fare"] == fare)
            & (data["Embarked"] == embarked)
        ]

        if not matching_passenger.empty:
            passenger_name = matching_passenger["Name"].iloc[0]
            prediction = model.predict(passenger_data)
            survival_status = "Survived" if prediction[0] == 1 else "Did not survive"
            print(f"\nPrediction for {passenger_name}: {survival_status}")
        else:
            print("No matching passenger found in the dataset.")

    except ValueError:
        print("Invalid input. Please enter valid details.")

# Run the prediction function for a specific passenger based on user input
predict_passenger_survival()


Model Accuracy: 0.79

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.84      0.83       105
           1       0.76      0.73      0.74        74

    accuracy                           0.79       179
   macro avg       0.79      0.78      0.79       179
weighted avg       0.79      0.79      0.79       179


Confusion Matrix:
[[88 17]
 [20 54]]

Enter passenger details for survival prediction:


Enter Passenger class (1, 2, or 3):  3
Enter Sex (0 for male, 1 for female):  0
Enter Age:  22
Enter Fare:  7.25
Enter Embarked (0 for S, 1 for C, 2 for Q):  0



Prediction for Braund, Mr. Owen Harris: Did not survive
