# Titanic Competion File

### Importing Required Libraries 

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

###  Load the training data 

In [2]:
train_data = pd.read_csv('train.csv')

### Load the test data

In [3]:
test_data = pd.read_csv('test.csv')

### Combine training and test data for encoding

In [4]:
combined_data = pd.concat([train_data, test_data], ignore_index=True)

### Preprocess the combined data

In [5]:
combined_data['Age'].fillna(combined_data['Age'].median(), inplace=True)
combined_data['Embarked'].fillna(combined_data['Embarked'].mode()[0], inplace=True)
combined_data['Fare'].fillna(combined_data['Fare'].median(), inplace=True)  

##### Add this line to handle missing 'Fare' values

### Encode categorical variables

In [6]:
encoder = LabelEncoder()
combined_data['Sex'] = encoder.fit_transform(combined_data['Sex'])
combined_data['Embarked'] = encoder.fit_transform(combined_data['Embarked'])

### Split the combined data back into training and test sets

In [7]:
train_data = combined_data[:len(train_data)]
test_data = combined_data[len(train_data):]

### Select relevant features

In [8]:
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = train_data[features]
y = train_data['Survived']

### Split data into train and validation sets

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

### Create and train a Random Forest Classifier

In [10]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

### Select relevant features for the test data

In [11]:
X_test = test_data[features]

### Make predictions on the test data

In [13]:
predictions = model.predict(X_test)

### Create a submission file

In [14]:
submission = pd.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': predictions})
submission.to_csv('titanic_submission.csv', index=False)

### Evaluate the model on the validation data (optional)

In [15]:
val_predictions = model.predict(X_val)
accuracy = accuracy_score(y_val, val_predictions)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

Validation Accuracy: 82.12%
