In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# prepare dataset
class DiabetesDataset(Dataset):
    def __init__(self, filepath):
        xy = np.loadtxt(filepath, delimiter = ',', dtype = np.float32)
        self.len = xy.shape[0] #输出样本的个数
        self.x_data = torch.from_numpy(xy[:,:-1])
        self.y_data = torch.from_numpy(xy[:,[-1]])
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

    
dataset = DiabetesDataset('diabetes.csv.gz')
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)


# design model using class
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8,6)
        self.linear2 = torch.nn.Linear(6,4)
        self.linear3 = torch.nn.Linear(4,1)
        self.activate = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        x = self.activate(self.linear1(x))
        x = self.activate(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
    
model = Model()

# construct loss and optimizer
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)

epoch_list = []
loss_list = []

In [None]:
# training cycle
for epoch in range(100):
    for i, data in enumerate(train_loader, 0):#train_loader是先shuffle再minibatch
        inputs, lables = data
        y_pred = model(inputs)
        
        loss = criterion(y_pred, lables)
        print(epoch, i, loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()

# practice

In [32]:
# titanic
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# data_train = pd.read_csv("data/titanic/train.csv")
# data_test = pd.read_csv("data/titanic/test.csv")

# prepare dataset
class TitanicDataset(Dataset):
    def __init__(self, filepath):
        features = ["Pclass", "Sex", "SibSp", "Parch", "Fare"]
        data = pd.read_csv(filepath)
        self.len = data.shape[0] #输出样本的个数
        
        self.x_data = torch.from_numpy(np.array(pd.get_dummies(data[features])))
        self.y_data = torch.from_numpy(np.array(data["Survived"]))
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

    
dataset = TitanicDataset('data/titanic/train.csv')
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)


# design model using class
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(6,4)
        self.linear2 = torch.nn.Linear(4,2)
        self.linear3 = torch.nn.Linear(2,1)
        self.activate = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        x = self.activate(self.linear1(x))
        x = self.activate(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
    
    def predict(self, x):
        with torch.no_grad():
            x = self.activate(self.linear1(x))
            x = self.activate(self.linear2(x))
            x = self.sigmoid(self.linear3(x))
            y = []
            for i in x:
                if i > 0.5:
                    y.append(1)
                else:
                    y.append(0)
            return y
    
model = Model()

# construct loss and optimizer
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)


In [33]:
# training cycle
for epoch in range(10000):
    for i, data in enumerate(train_loader, 0):#train_loader是先shuffle再minibatch
        inputs, lables = data
        inputs = inputs.float()
        lables = lables.float()
        
        y_pred = model(inputs)
        
        y_pred = y_pred.squeeze(-1)
        loss = criterion(y_pred, lables)
        
        if epoch % 200 == 0:
            print(epoch, i, loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()

0 0 2.5958967208862305
0 1 0.8126197457313538
0 2 0.752801239490509
0 3 0.7830712795257568
0 4 0.6936473250389099
0 5 0.7039774060249329
0 6 0.6710496544837952
0 7 0.6604273319244385
0 8 0.6132091879844666
0 9 0.6892154216766357
0 10 0.6516841650009155
0 11 0.6953791379928589
0 12 0.6682546138763428
0 13 0.7040403485298157
0 14 0.6989466547966003
0 15 0.7851924300193787
0 16 0.9021596908569336
0 17 0.6432585716247559
0 18 0.6067265868186951
0 19 0.676828145980835
0 20 0.685175895690918
0 21 0.6355676651000977
0 22 0.6730715036392212
0 23 0.6876325011253357
0 24 0.6522570252418518
0 25 0.6775635480880737
0 26 0.6774240732192993
0 27 0.6409962773323059
200 0 0.40858379006385803
200 1 0.3697158694267273
200 2 0.38445162773132324
200 3 0.4783549904823303
200 4 0.32115042209625244
200 5 0.4744492769241333
200 6 0.6019500494003296
200 7 0.5475504994392395
200 8 0.6539498567581177
200 9 0.5386500954627991
200 10 0.40628379583358765
200 11 0.44430550932884216
200 12 0.42693576216697693
200 13 

2400 0 0.44054916501045227
2400 1 0.4568316340446472
2400 2 0.4146784842014313
2400 3 0.37903282046318054
2400 4 0.4956895709037781
2400 5 0.2925831079483032
2400 6 0.48660892248153687
2400 7 0.6225166320800781
2400 8 0.38934326171875
2400 9 0.49262735247612
2400 10 0.39287069439888
2400 11 0.402217835187912
2400 12 0.4709665775299072
2400 13 0.4169061779975891
2400 14 0.4654250144958496
2400 15 0.5165814161300659
2400 16 0.7311063408851624
2400 17 0.3257800340652466
2400 18 0.4950384497642517
2400 19 0.6582372188568115
2400 20 0.4649197459220886
2400 21 0.4440114200115204
2400 22 0.4901416599750519
2400 23 0.634307324886322
2400 24 0.40325820446014404
2400 25 0.5248196721076965
2400 26 0.6544187664985657
2400 27 0.5407052636146545
2600 0 0.5505704283714294
2600 1 0.4100929796695709
2600 2 0.2887745797634125
2600 3 0.45924097299575806
2600 4 0.5872353315353394
2600 5 0.3613382875919342
2600 6 0.781568169593811
2600 7 0.4838595688343048
2600 8 0.5894134640693665
2600 9 0.474665462970733

4600 0 0.4507945775985718
4600 1 0.4118753671646118
4600 2 0.6239346861839294
4600 3 0.7798731327056885
4600 4 0.4243992269039154
4600 5 0.5472896099090576
4600 6 0.5489082336425781
4600 7 0.46277284622192383
4600 8 0.5509013533592224
4600 9 0.3577403426170349
4600 10 0.5497213006019592
4600 11 0.5482264757156372
4600 12 0.45191290974617004
4600 13 0.4461452066898346
4600 14 0.3416578471660614
4600 15 0.6301451325416565
4600 16 0.46965333819389343
4600 17 0.4917897880077362
4600 18 0.5745563507080078
4600 19 0.4295068085193634
4600 20 0.5664388537406921
4600 21 0.4692048132419586
4600 22 0.46428319811820984
4600 23 0.5356431007385254
4600 24 0.6003036499023438
4600 25 0.5012972950935364
4600 26 0.3695986568927765
4600 27 0.4035394787788391
4800 0 0.601064145565033
4800 1 0.38399794697761536
4800 2 0.5568060278892517
4800 3 0.2655850946903229
4800 4 0.31996676325798035
4800 5 0.47685670852661133
4800 6 0.6340867280960083
4800 7 0.3480367660522461
4800 8 0.5111411213874817
4800 9 0.60520

6800 0 0.6732938885688782
6800 1 0.4353896379470825
6800 2 0.41629984974861145
6800 3 0.49761727452278137
6800 4 0.29881495237350464
6800 5 0.41332200169563293
6800 6 0.5849049091339111
6800 7 0.4889122247695923
6800 8 0.5489603281021118
6800 9 0.482219934463501
6800 10 0.5049343109130859
6800 11 0.46954143047332764
6800 12 0.6412652730941772
6800 13 0.470748633146286
6800 14 0.44020071625709534
6800 15 0.6098464131355286
6800 16 0.3541877269744873
6800 17 0.6203197836875916
6800 18 0.45286062359809875
6800 19 0.49920281767845154
6800 20 0.2918024957180023
6800 21 0.6042749285697937
6800 22 0.4540141820907593
6800 23 0.5534182786941528
6800 24 0.4987007677555084
6800 25 0.5287449955940247
6800 26 0.5770590901374817
6800 27 0.4618973135948181
7000 0 0.37744593620300293
7000 1 0.46106261014938354
7000 2 0.6770110130310059
7000 3 0.40277671813964844
7000 4 0.5068092346191406
7000 5 0.4640564024448395
7000 6 0.5372507572174072
7000 7 0.4133818745613098
7000 8 0.4918830692768097
7000 9 0.50

9000 0 0.5682702660560608
9000 1 0.681220293045044
9000 2 0.5308101773262024
9000 3 0.7019100189208984
9000 4 0.5058436393737793
9000 5 0.37288108468055725
9000 6 0.5814337134361267
9000 7 0.5174456238746643
9000 8 0.6304919123649597
9000 9 0.49904757738113403
9000 10 0.5870500206947327
9000 11 0.5281715393066406
9000 12 0.5182390809059143
9000 13 0.4782353341579437
9000 14 0.5543197989463806
9000 15 0.4106866121292114
9000 16 0.46859604120254517
9000 17 0.4491052031517029
9000 18 0.4548032283782959
9000 19 0.2855585217475891
9000 20 0.5842016339302063
9000 21 0.5267715454101562
9000 22 0.4408054053783417
9000 23 0.3705750107765198
9000 24 0.4495716691017151
9000 25 0.6271613836288452
9000 26 0.4955919682979584
9000 27 0.5531107783317566
9200 0 0.4257992208003998
9200 1 0.4869183599948883
9200 2 0.6039755940437317
9200 3 0.5819627642631531
9200 4 0.3524205684661865
9200 5 0.5863351821899414
9200 6 0.38812267780303955
9200 7 0.5752519369125366
9200 8 0.3986901640892029
9200 9 0.39380332

In [34]:
test_data = pd.read_csv('data/titanic/test.csv')
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare"]
test = torch.from_numpy(np.array(pd.get_dummies(test_data[features])))

y = model.predict(test.float())

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': y})
output.to_csv('data/titanic/my_predict.csv', index=False)

In [6]:
import torch

features = ["Pclass", "Sex", "SibSp", "Parch", "Fare"]
data = pd.read_csv('data/titanic/train.csv')
len = data.shape[0]  # shape(多少行，多少列)

# data[features]的类型是DataFrame,先进行独热表示，然后转成array,最后转成tensor用于进行矩阵计算。
# df的get_dummies不会将整数进行独热
x_data = torch.from_numpy(np.array(pd.get_dummies(data[features])))
y_data = torch.from_numpy(np.array(data["Survived"]))



len: 891
tensor([ 1.0000,  1.0000,  0.0000, 71.2833,  1.0000,  0.0000],
       dtype=torch.float64)


tensor([3.0000, 0.0000, 0.0000, 7.9250, 1.0000, 0.0000], dtype=torch.float64)
