In [25]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [26]:
from google.colab import files


uploaded = files.upload()

Saving tested.csv to tested (1).csv


In [27]:
# Assuming the uploaded CSV file is named 'titanic.csv'
titanic_data = pd.read_csv('tested.csv')

# Display the first few rows of the DataFrame
titanic_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [28]:
# Explore and preprocess the data
# For simplicity, we'll drop some columns that may not contribute significantly to the model
titanic_data = titanic_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)

In [29]:
# Convert categorical variables into numerical variables using one-hot encoding
titanic_data = pd.get_dummies(titanic_data, columns=['Sex', 'Pclass'], drop_first=True)

In [30]:
# Fill missing values (e.g., for Age) or drop rows with missing values as needed
titanic_data['Age'].fillna(titanic_data['Age'].median(), inplace=True)
titanic_data.dropna(inplace=True)

In [31]:
# Define features (X) and target (y)
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

In [32]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
# Build a Random Forest Classifier model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [34]:
# Make predictions on the test set
y_pred = model.predict(X_test)


In [35]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')


Model Accuracy: 100.00%


In [36]:
# Display classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      1.00      1.00        34

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84



In [37]:
# Display the passengers who survived
survived_passengers = titanic_data[titanic_data['Survived'] == 1]
print('\nPassengers who survived:')
print(survived_passengers)


Passengers who survived:
     Survived   Age  SibSp  Parch      Fare  Sex_male  Pclass_2  Pclass_3
1           1  47.0      1      0    7.0000         0         0         1
4           1  22.0      1      1   12.2875         0         0         1
6           1  30.0      0      0    7.6292         0         0         1
8           1  18.0      0      0    7.2292         0         0         1
12          1  23.0      1      0   82.2667         0         0         0
..        ...   ...    ...    ...       ...       ...       ...       ...
409         1   3.0      1      1   13.7750         0         0         1
410         1  27.0      0      0    7.7500         0         0         1
411         1  37.0      1      0   90.0000         0         0         0
412         1  28.0      0      0    7.7750         0         0         1
414         1  39.0      0      0  108.9000         0         0         0

[152 rows x 8 columns]
