In [2]:
import pandas as pd 
import numpy as np
import torch
import matplotlib.pyplot as plt 
import seaborn as sns 

In [3]:
df=pd.read_csv('train.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
# fill nan from columns
df['Embarked']=df['Embarked'].apply(lambda x:np.random.choice(['C','S','Q']) if pd.isna(x) else x)
df["Age"]=df["Age"].apply(lambda x:np.mean(df['Age']) if pd.isna(x) else x).astype(int)

In [5]:
df['C']=(df['Embarked'] == "C" ).astype(int)
df['S']=(df['Embarked'] == "S" ).astype(int)
df['Q']=(df['Embarked'] == "Q" ).astype(int)
df['male']=(df['Sex'] == 'male').astype(int)


In [6]:
#delete useless columns
df.drop(['Name'],axis=1,inplace=True)
df.drop(['Ticket'],axis=1,inplace=True)
df.drop(['Cabin'],axis=1,inplace=True)
df.drop(['Embarked'],axis=1,inplace=True)
df.drop(['Sex'],axis=1,inplace=True)


In [7]:
df#now we have a clean dataframe

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare,C,S,Q,male
0,1,0,3,22,1,0,7.2500,0,1,0,1
1,2,1,1,38,1,0,71.2833,1,0,0,0
2,3,1,3,26,0,0,7.9250,0,1,0,0
3,4,1,1,35,1,0,53.1000,0,1,0,0
4,5,0,3,35,0,0,8.0500,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,27,0,0,13.0000,0,1,0,1
887,888,1,1,19,0,0,30.0000,0,1,0,0
888,889,0,3,29,1,2,23.4500,0,1,0,0
889,890,1,1,26,0,0,30.0000,1,0,0,1


## Spliting our data frame to train,test X,y  and totensor

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X=df[['Pclass','Age','SibSp','Parch','Fare','C','S','Q','male']].values
y=df[['Survived']].values

scaler=StandardScaler()
X=scaler.fit_transform(X)

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

## Lets create our model

In [9]:
from torch import nn
class TitanicModel(nn.Module):
    def __init__(self,input_shape,output_shape,hidden_units):
        super().__init__()
        self.net=nn.Sequential(
            nn.Linear(in_features=input_shape,out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,out_features=output_shape),
            nn.Sigmoid()
        )
    def forward(self,x):
        return self.net(x)
model_T=TitanicModel(input_shape=9,hidden_units=10,output_shape=1)        

In [10]:
logits=model_T(X_test)
preds=(logits>0.5).int()


## Lets train our model_T

In [11]:
loss_fn=nn.BCELoss()
optimizer=torch.optim.SGD(lr=0.1,params=model_T.parameters())

In [12]:
def accuracy_fn(y_true: torch.Tensor, y_pred: torch.Tensor) -> float:

    y_pred_labels = (y_pred > 0.5).float()

    correct = (y_pred_labels == y_true).float().sum()
    acc = correct / y_true.numel()  
    return acc.item()              

In [13]:
from tqdm.auto import tqdm
epochs=2000

for epoch in tqdm(range(epochs)):
    model_T.train()

    y_pred=model_T(X_train)
    
    train_loss=loss_fn(y_pred,y_train)
    train_acc=accuracy_fn(y_train,y_pred)
    
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    with torch.inference_mode():
        model_T.eval()
        y_pred_test=model_T(X_test)
        test_loss=loss_fn(y_pred_test,y_test)
        test_acc=accuracy_fn(y_test,y_pred_test)
    if epoch%500==0:
        print(f"Epoch : {epoch} | Train_Loss : {train_loss:0.1f} | Train_Accuracy : {train_acc:0.1f} | Test_Loss : {test_loss:0.1f} | Test_Accuracy : {test_acc:0.1f}")


  0%|          | 0/2000 [00:00<?, ?it/s]

Epoch : 0 | Train_Loss : 0.7 | Train_Accuracy : 0.6 | Test_Loss : 0.7 | Test_Accuracy : 0.6
Epoch : 500 | Train_Loss : 0.4 | Train_Accuracy : 0.8 | Test_Loss : 0.4 | Test_Accuracy : 0.8
Epoch : 1000 | Train_Loss : 0.4 | Train_Accuracy : 0.9 | Test_Loss : 0.4 | Test_Accuracy : 0.8
Epoch : 1500 | Train_Loss : 0.4 | Train_Accuracy : 0.9 | Test_Loss : 0.4 | Test_Accuracy : 0.8


## Use our model on test.csv

In [14]:
test_df=pd.read_csv('test.csv')

In [15]:
test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [16]:
# fill nan from columns
test_df['Embarked']=test_df['Embarked'].apply(lambda x:np.random.choice(['C','S','Q']) if pd.isna(x) else x)
test_df["Age"]=test_df["Age"].apply(lambda x:np.mean(test_df['Age']) if pd.isna(x) else x).astype(int)

In [17]:
test_df['C']=(test_df['Embarked'] == "C" ).astype(int)
test_df['S']=(test_df['Embarked'] == "S" ).astype(int)
test_df['Q']=(test_df['Embarked'] == "Q" ).astype(int)
test_df['male']=(test_df['Sex'] == 'male').astype(int)


In [18]:
#delete useless columns
test_df.drop(['Name'],axis=1,inplace=True)
test_df.drop(['Ticket'],axis=1,inplace=True)
test_df.drop(['Cabin'],axis=1,inplace=True)
test_df.drop(['Embarked'],axis=1,inplace=True)
test_df.drop(['Sex'],axis=1,inplace=True)


In [19]:
test_df

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare,C,S,Q,male
0,892,3,34,0,0,7.8292,0,0,1,1
1,893,3,47,1,0,7.0000,0,1,0,0
2,894,2,62,0,0,9.6875,0,0,1,1
3,895,3,27,0,0,8.6625,0,1,0,1
4,896,3,22,1,1,12.2875,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...
413,1305,3,30,0,0,8.0500,0,1,0,1
414,1306,1,39,0,0,108.9000,1,0,0,0
415,1307,3,38,0,0,7.2500,0,1,0,1
416,1308,3,30,0,0,8.0500,0,1,0,1


In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X=test_df[['Pclass','Age','SibSp','Parch','Fare','C','S','Q','male']].values

scaler=StandardScaler()
X=scaler.fit_transform(X)
X= torch.tensor(X, dtype=torch.float32)

test_logits=model_T(X)
test_logits.sigmoid()
test_preds=(test_logits>0.5).int()

In [21]:
test_preds.shape

torch.Size([418, 1])

In [22]:
my_prediction = pd.DataFrame({
    "PassengerId": test_df["PassengerId"],
    "Survived": test_preds.cpu().numpy().flatten()
})

my_prediction.to_csv("my_prediction.csv", index=False)

my_prediction['Survived'].value_counts()

Survived
0    283
1    135
Name: count, dtype: int64