In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the Titanic dataset from the local file
# data_url = "https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv"
# data = pd.read_csv(data_url)
data = pd.read_csv(r"C:\Users\kamba\Desktop\train.csv")

In [3]:
# Print the column names to inspect
print(data.columns)

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


In [4]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:

# Data preprocessing: Drop unnecessary columns and handle missing values
data.drop(["Name", "Cabin", "Ticket"], axis=1, inplace=True)
data["Sex"] = data["Sex"].map({"male": 0, "female": 1})
data["Embarked"] = data["Embarked"].map({"C": 0, "Q": 1, "S": 2})
data.fillna(data.mean(), inplace=True)

In [6]:
# Define features and target
X = data.drop("Survived", axis=1)
y = data["Survived"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Create and train the decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

In [8]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

In [9]:

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7430167597765364


In [10]:
X_train.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
331,332,1,0,45.5,0,0,28.5,2.0
733,734,2,0,23.0,0,0,13.0,2.0
382,383,3,0,32.0,0,0,7.925,2.0
704,705,3,0,26.0,1,0,7.8542,2.0
813,814,3,1,6.0,4,2,31.275,2.0


In [11]:
# Example usage: Predict survival for a person
example_person = pd.DataFrame({
    "PassengerId": [230],
    "Pclass": [3],
    "Sex": [0],
    "Age": [30],
    "SibSp": [0],
    "Parch": [0],
    "Fare": [8.05],
    "Embarked": [2]
})

predicted_survival = clf.predict(example_person)
if predicted_survival[0] == 1:
    print("The person is predicted to be safe from sinking.")
else:
    print("The person is predicted not to be safe from sinking.")

The person is predicted not to be safe from sinking.
