In [6]:
import pandas
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

import torch
import torchvision
from torch import nn
from torch.nn import functional as F
from torch.utils import data

In [3]:
torch.cuda.is_available()

False

## Linear regression

In [None]:
df_data = pd.read_csv("kc_house_data.csv")

x_data = df_data['sqft_living'].values.reshape(-1, 1).astype(np.float32)
y_data = df_data["price"].values.reshape(-1, 1).astype(np.float32)

plt.scatter(x_data, y_data);

In [None]:
# standard scaler
std_scaler = StandardScaler()

x_data = std_scaler.fit_transform(x_data)
y_data = std_scaler.fit_transform(y_data)

plt.scatter(x_data, y_data);

In [None]:
# minmax scaler
minmax_scaler = MinMaxScaler()

x_data = minmax_scaler.fit_transform(x_data)
y_data = minmax_scaler.fit_transform(y_data)

plt.scatter(x_data, y_data);

In [None]:
# robust scaler
robust_scaler = RobustScaler()

x_data = robust_scaler.fit_transform(x_data)
y_data = robust_scaler.fit_transform(y_data)

plt.scatter(x_data, y_data);

### Data Preparation

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42)

In [None]:
class DatasetLinearR(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, x_data, y_data):
        'Initialization'
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.x_data)

    def __getitem__(self, index):
        'Generates one sample of data'
        return self.x_data[index], self.y_data[index]
    
    
params = {
    'batch_size': 64,
    'shuffle': True,
    'num_workers': 8
}

# Generators
training_set = DatasetLinearR(x_train, y_train)
training_generator = data.DataLoader(training_set, **params)

test_set = DatasetLinearR(x_test, y_test)
test_generator = data.DataLoader(test_set, **params)

### Model

In [None]:
class LinearRegression(nn.Module):
    def __init__(self, x_dim, y_dim):
        super(LinearRegression, self).__init__()
        self.i2o = nn.Linear(x_dim, y_dim, bias=True)
        self.loss = nn.MSELoss()
        
        
    def forward(self, x):
        return self.i2o(x)
    
    
    def train_(self, training_generator, epochs, lr=0.5):
        self.optim = torch.optim.Adam(self.parameters(), lr=lr)
        
        for epoch in range(epochs):
            for x_data, y_data in training_generator:
                y_pred = self.forward(x_data)
                loss = self.loss(y_pred, y_data)
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

            if epoch % 50 == 0:
                 print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))
                    
    def test(self, test_generator):
            self.eval()
            loss = 0
            with torch.no_grad():
                for x_data, y_data in test_generator:
                    x_data, y_data = x_data, y_data
                    y_pred = self.forward(x_data)
                    loss += (y_pred - y_data)**2

            print('Test Accuracy of the model on the 10000 test images: {} %'.format(loss))
            
        

In [None]:
params_model = {
    "x_dim": 1,
    "y_dim": 1
}

params_train = {
    "training_generator": training_generator,
    "epochs": 200,
    "lr": 0.001
}


linear_regression = LinearRegression(**params_model)

linear_regression.train_(**params_train)

In [None]:
y_pred = linear_regression.forward(torch.from_numpy(x_test)).detach().numpy()
plt.plot(x_test, y_test, 'ro', label='Original data');
plt.plot(x_test, y_pred, label='Fitted line');
plt.legend();

In [None]:
linear_regression.i2o.state_dict()

# Logistic Regression

In [None]:
x_data = np.random.uniform(-100, 100, size=(200, 2)).astype(np.float32)
theta = np.random.uniform(-5, 5, size=(2,1))
y_data = (x_data[:, 1] > (x_data[:, 0] * theta[0] + theta[1])).astype(np.float32)
y_data = y_data.reshape(-1, 1)

plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data.squeeze());

### Data Preparation

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42)

In [None]:
class DatasetLogisticR(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, x_data, y_data):
        'Initialization'
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.x_data)

    def __getitem__(self, index):
        'Generates one sample of data'
        return self.x_data[index], self.y_data[index]
    
    
params = {
    'batch_size': 64,
    'shuffle': True,
    'num_workers': 8
}

# Generators
training_set = DatasetLogisticR(x_train, y_train)
training_generator = data.DataLoader(training_set, **params)

test_set = DatasetLogisticR(x_test, y_test)
test_generator = data.DataLoader(test_set, **params)

### Model

In [None]:
class LogisticRegression(nn.Module):
    def  __init__(self, x_dim, y_dim=1):
        super(LogisticRegression, self).__init__()
        self.i2o = nn.Linear(x_dim, y_dim, bias=True)
        self.o_activation = nn.Sigmoid()
        self.loss = nn.BCELoss()
        
        
    def forward(self, x):
        return F.sigmoid(self.i2o(x))
    
    
    def train_(self, training_generator, epochs, lr=0.5):
        self.optim = torch.optim.Adam(self.parameters(), lr=lr)
        
        for epoch in range(epochs):
            for x_data, y_data in training_generator:
                y_pred = self.forward(x_data)
                loss = self.loss(y_pred, y_data)
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

            if epoch % 50 == 0:
                 print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))
        

In [None]:
params_model = {
    "x_dim": 2,
    "y_dim": 1
}

params_train = {
    "training_generator": training_generator,
    "epochs": 300,
    "lr": 0.001
}


logistic_regression = LogisticRegression(**params_model)

logistic_regression.train_(**params_train)

### Evaluation

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
y_pred = logistic_regression.forward(torch.from_numpy(x_test)).detach().numpy()

# roc score
roc_auc_score(y_test, y_pred)

In [None]:
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test.squeeze());

In [None]:
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_pred.squeeze());

In [None]:
logistic_regression.state_dict()

# Feed Forward Network

In [None]:
x_data = 10 * np.random.random(size=(10000, 1)) - 3
x_data = x_data.astype(np.float32)
y_data = 10*np.sin(x_data) + x_data

plt.scatter(x_data, y_data);

In [None]:
x_data = 15 * np.random.random(size=(1000, 1)) - 6
x_data = x_data.astype(np.float32)
y_data = 10*x_data**2

plt.scatter(x_data, y_data);

In [None]:
# minmax scaler
minmax_scaler = MinMaxScaler()

x_data = minmax_scaler.fit_transform(x_data)
y_data = minmax_scaler.fit_transform(y_data)

plt.scatter(x_data, y_data);

### Data Preparation

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42)

In [None]:
class DatasetFFNN(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, x_data, y_data):
        'Initialization'
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.x_data)

    def __getitem__(self, index):
        'Generates one sample of data'
        return self.x_data[index], self.y_data[index]
    
    
params = {
    'batch_size': 64,
    'shuffle': True,
    'num_workers': 8
}

# Generators
training_set = DatasetFFNN(x_train, y_train)
train_generator = data.DataLoader(training_set, **params)

test_set = DatasetFFNN(x_test, y_test)
test_generator = data.DataLoader(test_set, **params)

### Model

In [None]:
class FFNN(nn.Module):
    def __init__(self, x_dim, h_dim, y_dim, prob):
        super(FFNN, self).__init__()
        self.i2h = nn.Linear(x_dim, h_dim)
        self.h2o = nn.Linear(h_dim, y_dim)
        
        self.bn1 = nn.BatchNorm1d(h_dim)
        self.drop1 = nn.Dropout(p=prob)
        self.loss = nn.MSELoss()
        
        
    def forward(self, x):
        x = self.i2h(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.drop1(x)
        x = self.h2o(x)
        return x
    
    
    def train_(self, train_generator, epochs, lr=0.5):
        self.optim = torch.optim.SGD(self.parameters(), lr=lr)
        
        for epoch in range(epochs):
            for x_data, y_data in train_generator:
                y_pred = self.forward(x_data)
                loss = self.loss(y_pred, y_data)
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

            if epoch % 50 == 0:
                 print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))


In [None]:
params_model = {
    "x_dim": 1,
    "h_dim": 5,
    "y_dim": 1,
    "prob": 0.3
}

params_train = {
    "train_generator": train_generator,
    "epochs": 500,
    "lr": 0.001
}

    
ffnn = FFNN(**params_model)

ffnn.train_(**params_train)

### Evaluation

In [None]:
ffnn.eval()
y_pred = ffnn.forward(torch.from_numpy(x_test)).detach().numpy()

plt.scatter(x_test, y_pred, label='Fitted line');

# Convolutional Neural Network

In [4]:
from torchvision.datasets import MNIST

In [7]:
train_generator = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST(
        './mnist/', train=True, download=True,
        transform=torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize((0.1307,), (0.3081,))
        ])),
    batch_size=64,
    shuffle=True
)

In [None]:
test_dataset = torchvision.datasets.MNIST(
    root='./mnist/', train=False, download=True,
    transform=torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize((0.1307,), (0.3081,))
        ])
)



test_generator = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=64, 
                                          shuffle=False)

In [None]:
class ConvNet(nn.Module):
    def __init__(self, x_channel, b1_channel, b2_channel, b3_dim, y_dim,
                 kernel_size_conv, kernel_size_mp, stride_conv, stride_mp,
                 padding_conv, prob_dropout):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(x_channel, b1_channel, kernel_size=kernel_size_conv,
                      stride=stride_conv, padding=padding_conv),
            nn.BatchNorm2d(b1_channel),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=kernel_size_mp, stride=stride_mp))
        self.layer2 = nn.Sequential(
            nn.Conv2d(b1_channel, b2_channel, kernel_size=kernel_size_conv,
                      stride=stride_conv, padding=padding_conv),
            nn.BatchNorm2d(b2_channel),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=kernel_size_mp, stride=stride_mp))
        self.dropout = nn.Dropout(p=prob_dropout)
        self.fc = nn.Linear(7*7*b2_channel, y_dim)
        self.loss = nn.CrossEntropyLoss()
        
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.flatten(start_dim=1)
        return self.fc(x)
    
    
    def train_(self, train_generator, epochs, lr=0.01):
        self.optim = torch.optim.SGD(self.parameters(), lr=lr)
        
        for epoch in range(epochs):
            for x_data, y_data in train_generator:
                x_data, y_data = x_data.cuda(), y_data.cuda()
                y_pred = self.forward(x_data)
                loss = self.loss(y_pred, y_data)
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

            if epoch % 10 == 0:
                 print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))
                
                
    def test(self, test_generator):
        self.eval()
        acc = 0
        loss = 0
        with torch.no_grad():
            for x_data, y_data in test_generator:
                x_data, y_data = x_data.cuda(), y_data.cuda()
                y_pred = self.forward(x_data)
                _, labels_pred = torch.max(y_pred.data, 1)
                acc += (labels_pred == y_data).sum().item()

        print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * acc / len(test_generator.dataset)))



In [None]:
params_model = {
    "x_channel": 1,
    "b1_channel": 16,
    "b2_channel": 32,
    "b3_dim": 128,
    "y_dim": 10,
    
    "kernel_size_conv": 5,
    "kernel_size_mp": 2,
    
    "stride_conv": 1,
    "stride_mp": 2,
    
    "padding_conv": 2,
    "prob_dropout": 0.2
}

params_train = {
    "train_generator": train_generator,
    "epochs": 20,
    "lr": 0.001
}

    
cnn = ConvNet(**params_model).cuda()

# next(cnn.parameters()).is_cuda

cnn.train_(**params_train)

### Evaluation

In [None]:
cnn.test(test_generator)

Test Accuracy of the model on the 10000 test images: 8.94 %


In [None]:
test(cnn, test_generator)

Test Accuracy of the model on the 10000 test images: 98.74 %
