In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the training data
train_data = pd.read_csv("titanictrain.csv")

# Drop columns that may not be useful for prediction
X = train_data.drop(['Survived', 'PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
y = train_data['Survived']

# Handle missing values (you may need to customize this based on your specific dataset)
X['Age'].fillna(X['Age'].median(), inplace=True)
X['Embarked'].fillna(X['Embarked'].mode()[0], inplace=True)
X['Fare'].fillna(X['Fare'].median(), inplace=True)

# Convert categorical variables to numerical
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Print the classification results
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Print the first few rows of X_test, y_test, and y_pred
print("X_test:")
print(pd.DataFrame(X_test, columns=X.columns).head())

print("y_test:")
print(y_test.head())

print("y_pred:")
print(y_pred)


Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179

Accuracy: 0.8212290502793296
X_test:
     Pclass       Age     SibSp     Parch      Fare  Sex_male  Embarked_Q  \
0  0.813034 -0.092634  0.379923  0.784700 -0.333901  0.724310   -0.303355   
1 -0.400551  0.138156 -0.470722 -0.479342 -0.425284  0.724310   -0.303355   
2  0.813034 -0.708074 -0.470722 -0.479342 -0.474867  0.724310   -0.303355   
3 -0.400551 -1.785093 -0.470722  0.784700  0.007966 -1.380624   -0.303355   
4  0.813034 -1.169653  0.379923 -0.479342 -0.411002 -1.380624   -0.303355   

   Embarked_S  
0   -1.687794  
1    0.592489  
2    0.592489  
3    0.592489  
4   -1.687794  
y_test:
709    1
439    0
840    0
720    1
39     1
N