**Step 1: Import Required Packages**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


**Step 2: Load and Explore the Data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

path = "/content/drive/MyDrive/Titanic_Dataset.csv"
df = pd.read_csv(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load the dataset
df = pd.read_csv('Titanic_Dataset.csv')

# Explore the data
print(df.head())  # View the first few rows
print(df.describe())  # Statistical summary of the data
print(df.info())  # Information about the columns and data types


   Passengerid   Age     Fare  Sex  sibsp  zero  zero.1  zero.2  zero.3  \
0            1  22.0   7.2500    0      1     0       0       0       0   
1            2  38.0  71.2833    1      1     0       0       0       0   
2            3  26.0   7.9250    1      0     0       0       0       0   
3            4  35.0  53.1000    1      1     0       0       0       0   
4            5  35.0   8.0500    0      0     0       0       0       0   

   zero.4  ...  zero.12  zero.13  zero.14  Pclass  zero.15  zero.16  Embarked  \
0       0  ...        0        0        0       3        0        0       2.0   
1       0  ...        0        0        0       1        0        0       0.0   
2       0  ...        0        0        0       3        0        0       2.0   
3       0  ...        0        0        0       1        0        0       2.0   
4       0  ...        0        0        0       3        0        0       2.0   

   zero.17  zero.18  2urvived  
0        0        0         0 

**Step 3: Preprocess the Data**

In [None]:
# Handle missing values
df.fillna(df.mean(), inplace=True)  # Fill missing numerical values with the mean
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)  # Fill missing categorical values with mode

# Convert categorical variables into numerical form
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])


# Select relevant features
features = ['Pclass', 'Sex', 'Age', 'sibsp', 'Parch']
X = df[features]

y = df['2urvived']



**Step 4: Split the Data into Training and Testing Sets**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


**Step 5: Train the Model**

In [None]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


**Step 6: Evaluate the Model**

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7442748091603053


**Step 7: Make Predictions**

In [None]:
new_data = pd.DataFrame([[3, 1, 25, 0, 1]], columns=['Pclass', 'Sex', 'Age', 'sibsp', 'Parch'])
prediction = model.predict(new_data)
print("Survival Prediction:", prediction)


Survival Prediction: [0]


**Model 2**

In [None]:
from xgboost.sklearn import XGBClassifier
model = XGBClassifier()
model.fit(X_train, y_train)


In [None]:
import xgboost as xgb

# Create an instance of the XGBoost classifier
model = xgb.XGBClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7595419847328244


In [None]:
new_data = pd.DataFrame([[3, 1, 25, 0, 1]], columns=['Pclass', 'Sex', 'Age', 'sibsp', 'Parch'])
prediction = model.predict(new_data)
print("Survival Prediction:", prediction)


Survival Prediction: [1]
