In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from sklearn.ensemble import RandomForestClassifier

train_data = pd.read_csv("/kaggle/input/titanic/train.csv")
test_data = pd.read_csv("/kaggle/input/titanic/test.csv")

# List of rare titles
rare_titles = ["Dr", "Rev", "Major", "Col", "Countess", "Capt", "Sir", "Lady", "Don", "Jonkheer"]

def fill_missing(df):
    df.fillna({"Age": df["Age"].mean(),"Fare": df["Fare"].mean(), "Embarked": "S"}, inplace=True)
    df["Embarked"] = df["Embarked"].map({"S": 0, "C": 1, "Q": 2})
    df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
    df.drop(columns=["Cabin"], inplace=True)
    df["FamilySize"] = df["SibSp"] + df["Parch"] + 1

    # Extract titles and store into new "Title" column
    df["Title"] = df["Name"].str.extract("([A-Za-z]+)\.", expand=False)

    # Replace titles
    df["Title"] = df["Title"].replace({"Mlle": "Miss", "Ms": "Miss", "Mme": "Mrs"})
    df["Title"] = df["Title"].replace(rare_titles, "Rare")

    # Map titles to numbers
    df["Title"] = df["Title"].map({
        "Mr": 0,
        "Miss": 1,
        "Mrs": 2,
        "Master": 3,
        "Rare": 4
    })

    df["Title"] = df["Title"].fillna(4) 
    return df

train_data = fill_missing(train_data)
test_data = fill_missing(test_data)

features = ["Pclass", "Sex", "Age", "Fare", "Embarked", "FamilySize", "Title"]
X = train_data[features]
y = train_data["Survived"]
X_test = test_data[features]

model = RandomForestClassifier(random_state=42)
model.fit(X, y)
predictions = model.predict(X_test)

output = pd.DataFrame({
    "PassengerId": test_data["PassengerId"],
    "Survived": predictions
})

output.to_csv("submission.csv", index=False)



/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv
