In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the Titanic dataset
file_path = "Titanic-Dataset.csv"  # Update path if needed
df = pd.read_csv(r'C:\Users\MANSI\Documents\task\Titanic-Dataset.csv')

In [3]:
# Data Preprocessing
print("Initial dataset shape:", df.shape)

Initial dataset shape: (891, 12)


In [4]:
# Drop unnecessary columns
df = df.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"])

In [5]:
# Fill missing values
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)


In [6]:
# Encode categorical variables
label_encoder = LabelEncoder()
df["Sex"] = label_encoder.fit_transform(df["Sex"])
df["Embarked"] = label_encoder.fit_transform(df["Embarked"])

In [7]:
# Split dataset into features and target variable
X = df.drop(columns=["Survived"])
y = df["Survived"]

In [8]:
# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)

Training set size: (712, 7)
Testing set size: (179, 7)


In [10]:
# Train K-Nearest Neighbors (KNN) model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred_knn)
print(f"KNN Model Accuracy: {knn_accuracy:.2f}")

KNN Model Accuracy: 0.80


In [11]:
# Manual Prediction Example
example_passenger = np.array([[3, 1, 22.0, 1, 0, 7.25, 2]])  # Example: 3rd class, male, 22 years, 1 sibling, 0 parents, low fare, embarked from Cherbourg
example_passenger_scaled = scaler.transform(example_passenger)
predicted_survival = knn_model.predict(example_passenger_scaled)[0]
print("Predicted Survival for Example Passenger:", "Survived" if predicted_survival == 1 else "Did not Survive")

Predicted Survival for Example Passenger: Did not Survive




In [12]:
# Conclusion
print("\nProject Summary:")
print("- The model was trained using Titanic dataset.")
print("- Features like age, gender, fare, and embarkation were used for prediction.")
print(f"- KNN achieved an accuracy of {knn_accuracy:.2f}.")
print("- A manual test case was run for a passenger, and the survival prediction was displayed.")
print("- Further improvements can be made with feature engineering and hyperparameter tuning.")


Project Summary:
- The model was trained using Titanic dataset.
- Features like age, gender, fare, and embarkation were used for prediction.
- KNN achieved an accuracy of 0.80.
- A manual test case was run for a passenger, and the survival prediction was displayed.
- Further improvements can be made with feature engineering and hyperparameter tuning.
