## Imports

In [1]:
import pandas as pd

from sklearn.linear_model import LogisticRegression

## Load the Data

In [2]:
train = pd.read_csv("/kaggle/input/titanic/train.csv")

test = pd.read_csv("/kaggle/input/titanic/test.csv")

## Look at the Data

In [3]:
train .head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Preparing Data for the Mode

### Select Features & Target

In [4]:
features = ["Pclass", "Sex", "Age", "Fare", "FamilySize", "IsAlone"]

target = "Survived"

In [5]:
train["FamilySize"] = train["SibSp"] + train["Parch"] + 1

test["FamilySize"] = test["SibSp"] + test["Parch"] + 1

In [6]:
train["IsAlone"] = (train["FamilySize"] == 1).astype(int)

test["IsAlone"] = (test["FamilySize"] == 1).astype(int)

## Convert Text → Numbers

In [7]:
"""
male = 0

female = 1
"""

train["Sex"] = train["Sex"].map({"male": 0, "female": 1})

test["Sex"] = test["Sex"].map({"male": 0, "female": 1})

## Handle Missing Values (Age)

In [8]:
median_age = train["Age"].median()

median_fare = train["Fare"].median()

train["Age"] = train["Age"].fillna(median_age)

test["Age"] = test["Age"].fillna(median_age)

test["Fare"] = test["Fare"].fillna(median_fare)

In [9]:
train[features].isnull().sum()

Pclass        0
Sex           0
Age           0
Fare          0
FamilySize    0
IsAlone       0
dtype: int64

## Train Model

### Prepare Training Data

In [10]:
X = train[features]

y = train[target]

### Logistic Regression

In [11]:
model = LogisticRegression(max_iter=1000)

model.fit(X, y)

### Make Predictions on Test Data

In [12]:
X_test = test[features]

predictions = model.predict(X_test)

In [13]:
X_test.isnull().sum()

Pclass        0
Sex           0
Age           0
Fare          0
FamilySize    0
IsAlone       0
dtype: int64

### Create Submission File

In [14]:
submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Survived": predictions
})

In [15]:
submission.to_csv("submission.csv", index=False)