# load and Clean Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("train.csv")
df["Sex"]=df["Sex"].replace({"male":str(1),"female":str(0)}).astype(int)
df["Embarked"].fillna("S",inplace=True)
df["Embarked"]=df["Embarked"].replace({"S":str(0),"Q":str(1),"C":str(2)}).astype(int)
y_label=np.array(df["Survived"].values)
Pclass=np.array(df["Pclass"].values)
gender = np.array(df["Sex"].values)

ages=np.array(df["Age"].values)
ages[np.isnan(ages)]=0
ages=np.nan_to_num(ages)
mean_age=np.mean(ages)
ages[ages==0] = int(mean_age)
gap = int((np.max(ages)-np.min(ages))/2)
ages = (ages-gap)/np.max(ages)*10

SibSp=np.array(df["SibSp"].values)
Parch=np.array(df["Parch"].values)
embarked = np.array(df["Embarked"].values)
X_train=np.vstack((Pclass,gender,ages,SibSp,Parch,embarked)).T
X_train

array([[ 3.   ,  1.   , -2.125,  1.   ,  0.   ,  0.   ],
       [ 1.   ,  0.   , -0.125,  1.   ,  0.   ,  2.   ],
       [ 3.   ,  0.   , -1.625,  0.   ,  0.   ,  0.   ],
       ...,
       [ 3.   ,  0.   , -2.   ,  1.   ,  2.   ,  0.   ],
       [ 1.   ,  1.   , -1.625,  0.   ,  0.   ,  2.   ],
       [ 3.   ,  1.   , -0.875,  0.   ,  0.   ,  1.   ]])

In [3]:
X_train.shape

(891, 6)

In [4]:
y_label = np.array(df["Survived"].values)
# y_add=[]
# for i in range(891):
#     if y_label[i]==1:
#         y_add.append(0)
#     else:
#         y_add.append(1)
# y_label=np.stack((y_label,y_add),axis=1)

In [5]:
from sklearn.model_selection import train_test_split
import torch

X_train,X_cv,y_train,y_cv=train_test_split(X_train,y_label,test_size = 0.3)
X_train=X_train.reshape(X_train.shape[0],6)
y_train=y_train.reshape(y_train.shape[0])

In [6]:
X_train,y_train = torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long()
train_dataset = torch.utils.data.TensorDataset(X_train,y_train)
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=4)

In [7]:
print(X_train.shape)
print(y_train.shape)

torch.Size([623, 6])
torch.Size([623])


In [8]:
import torch.nn as nn
import torch.nn.functional as F

class FNN(nn.Module):
    def __init__(self):
        super(FNN,self).__init__()
        self.f1 = nn.Linear(1*1*6,30)
        self.f2 = nn.Linear(30,10)
        self.f3 = nn.Linear(10,2)
        
    def forward(self,x):
        x=F.relu(self.f1(x))
        x=F.relu(self.f2(x))
        x=self.f3(x)
        return x
classes=('0','1')

In [9]:
FNN()

FNN(
  (f1): Linear(in_features=6, out_features=30, bias=True)
  (f2): Linear(in_features=30, out_features=10, bias=True)
  (f3): Linear(in_features=10, out_features=2, bias=True)
)

In [20]:
import torch.optim as optim
net = FNN()
criterion=nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)

In [23]:
loss_record=[]
net_record = FNN()
for epoch in range(3):
    running_loss = 0.0
    for i,data in enumerate(train_loader,0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss=criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        loss_record.append(loss)
        if (i >=1 and np.min(loss_record)==loss):
            net_record = net
        
        running_loss += loss.item()
        if i % 10 == 9: 
            print('[%d,%3d] loss: %.3f'% (epoch+1,i+1,running_loss/10))
            running_loss=0.0
        
print("Finish!")

[1, 10] loss: 0.395
[1, 20] loss: 0.427
[1, 30] loss: 0.530
[1, 40] loss: 0.572
[1, 50] loss: 0.556
[1, 60] loss: 0.564
[1, 70] loss: 0.453
[1, 80] loss: 0.476
[1, 90] loss: 0.517
[1,100] loss: 0.452
[1,110] loss: 0.436
[1,120] loss: 0.467
[1,130] loss: 0.556
[1,140] loss: 0.465
[1,150] loss: 0.459
[2, 10] loss: 0.376
[2, 20] loss: 0.411
[2, 30] loss: 0.522
[2, 40] loss: 0.562
[2, 50] loss: 0.538
[2, 60] loss: 0.546
[2, 70] loss: 0.434
[2, 80] loss: 0.460
[2, 90] loss: 0.508
[2,100] loss: 0.432
[2,110] loss: 0.431
[2,120] loss: 0.451
[2,130] loss: 0.550
[2,140] loss: 0.441
[2,150] loss: 0.459
[3, 10] loss: 0.363
[3, 20] loss: 0.399
[3, 30] loss: 0.517
[3, 40] loss: 0.553
[3, 50] loss: 0.523
[3, 60] loss: 0.533
[3, 70] loss: 0.419
[3, 80] loss: 0.450
[3, 90] loss: 0.504
[3,100] loss: 0.416
[3,110] loss: 0.428
[3,120] loss: 0.440
[3,130] loss: 0.547
[3,140] loss: 0.422
[3,150] loss: 0.461
Finish!


In [25]:
PATH = './CNN.pth'
torch.save(net_record.state_dict(), PATH)

In [77]:
df = pd.read_csv("test.csv")
df["Sex"]=df["Sex"].replace({"male":str(1),"female":str(0)}).astype(int)
df["Embarked"].fillna("S",inplace=True)
df["Embarked"]=df["Embarked"].replace({"S":str(0),"Q":str(1),"C":str(2)}).astype(int)
Pclass=np.array(df["Pclass"].values)
gender = np.array(df["Sex"].values)

ages=np.array(df["Age"].values)
ages[np.isnan(ages)]=0
ages=np.nan_to_num(ages)
mean_age=np.mean(ages)
ages[ages==0] = int(mean_age)
gap = int((np.max(ages)-np.min(ages))/2)
ages = (ages-gap)/np.max(ages)*10

SibSp=np.array(df["SibSp"].values)
Parch=np.array(df["Parch"].values)
embarked = np.array(df["Embarked"].values)
X_test=np.vstack((Pclass,gender,ages,SibSp,Parch,embarked)).T

In [78]:
X_test = torch.Tensor(X_test).float()
X_test=X_test.reshape(X_test.shape[0], 6)

In [79]:
X_test.shape

torch.Size([418, 6])

In [81]:
pred=[]
with torch.no_grad():
    for data in X_test:
        inputs = data
#         imputs=imputs.unsqueeze(0) # 增加一个维度
        output = net(inputs)
        if output[0]>output[1]:
            i=0
        else:
            i=1
#         _,output=torch.max(output,1)
        pred.append(i)

In [82]:
list_num=np.array([])
list_num = np.array(range(len(pred))).T
list_num += 892
pred_ndarray=np.array(pred)
submission=np.vstack((list_num,pred_ndarray)).T
submission=submission.tolist()

In [84]:
import csv
for i in range(2):
    for j in range(list_num.shape[0]):
        submission[j][i]=str(submission[j][i])
        
with open('output.csv', 'w', newline='') as f:  
    writer = csv.writer(f) 
    for j in range(list_num.shape[0]):
        writer.writerow(submission[j]) 