https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data

In [55]:
#imports
from torch.utils.data import Dataset
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from torch.nn import Module
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import BCELoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
from torch.optim import SGD
from numpy import vstack
from sklearn.metrics import accuracy_score
from torch import Tensor

### Dataset definition
PyTorch provides the Dataset class that you can extend and customize to load your dataset.
For example, the constructor of your dataset object can load your data file (e.g. a CSV file). You can then override the __len__() function that can be used to get the length of the dataset (number of rows or samples), and the __getitem__() function that is used to get a specific sample by index.

The random_split() function can be used to split a dataset into train and test sets. Here we are dividing 67 train and 33 test data.

##### Current CSVDataset
label is the last one in the csv. It is a string represented as 'g': good and 'b': bad.
We use LabelEncoder to encode string labels to 0 and 1's.
We convert X and y's as floats.

Why self.y.reshape?




In [46]:
class CSVDataset(Dataset):
    def __init__(self,path):
        dataframe = read_csv(path, header = None)
        
        self.X = dataframe.values[:,:-1]
        self.y = dataframe.values[:,-1]
        
        self.X = self.X.astype('float32')
        self.y = LabelEncoder().fit_transform(self.y)
        self.y = self.y.astype('float32')
        print(self.y.shape)
        self.y = self.y.reshape(len(self.y),1)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return [self.X[idx],self.y[idx]]
    
    def get_split_data(self,n_train = 0.67):
        train_len = round(len(self.X)*n_train)
        test_len = len(self.X) - train_len
        return random_split(self,[train_len,test_len])
        
        

### Model definition
Linear 

In [11]:
class MLP(Module):
    def __init__(self, inputs):
        super(MLP,self).__init__()
        
        self.hidden1 = Linear(inputs,10)
        kaiming_uniform_(self.hidden1.weight,nonlinearity = 'relu')
        self.act1 = ReLU()
        
        self.hidden2 = Linear(10,8)
        kaiming_uniform_(self.hidden2.weight, nonlinearity = 'relu')
        self.act2 = ReLU()
        
        self.hidden3 = Linear(8,1)
        xavier_uniform_(self.hidden3.weight)
        self.act3 = Sigmoid()
        
    def forward(self,X):
        X = self.hidden1(X)
        X = self.act1(X)
        
        X = self.hidden2(X)
        X = self.act2(X)
        
        X = self.hidden3(X)
        X = self.act3(X)
        return X

### Prepare Data

In [28]:
def prepare_data(path):
    dataset = CSVDataset(path)
    train_data, test_data = dataset.get_split_data()
    train_dl = DataLoader(train_data,batch_size = 32, shuffle = True)
    test_dl = DataLoader(test_data, batch_size = 1024, shuffle = False)
    
    return train_dl, test_dl

### Train the model

In [13]:
def train_model(model,train_dl):
    criterion = BCELoss()
    optimizer = SGD(model.parameters(),lr = 0.01, momentum =0.9)
    
    for epoch in range(100):
        for i,(inputs,target) in enumerate(train_dl):
            optimizer.zero_grad()
            yhat = model(inputs)
            loss = criterion(yhat,target)
            loss.backward()
            optimizer.step()

### Evaluate model

In [14]:
def evaluate_model(model,test_dl):
    prediction, actual = list(),list()
    for i,(inputs,target) in enumerate(test_dl):
        yhat = model(inputs)
        
        yhat = yhat.detach().numpy()
        yhat = yhat.round()
        
        y = target.numpy()
        y = y.reshape(len(y),1)
        
        prediction.append(yhat)
        actual.append(y)
    prediction, actual = vstack(prediction), vstack(actual)
    
    acc = accuracy_score(actual,prediction)
    return acc

### Make predictions

In [18]:
def predict(row,model):
    row = Tensor([row])
    yhat = model(row)
    yhat = yhat.detach().numpy()
    return yhat    

In [61]:
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data"
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))

model = MLP(34)

train_model(model,train_dl)

acc = evaluate_model(model,test_dl)
print("Accuracy ", acc)

row = [1,0,1,0.06655,1,-0.18388,1,-0.27320,1,-0.43107,1,-0.41349,0.96232,-0.51874,0.90711,-0.59017,0.89230,-0.66474,0.69876,-0.70997,0.70645,-0.76320,0.63081,-0.80544,0.55867,-0.89128,0.47211,-0.86500,0.40303,-0.83675,0.30996,-0.89093,0.22995,-0.89158]
yhat = predict(row,model)
print('Predicted: %.3f (class=%d)' % (yhat, yhat.round()))

row = [1,0,0.71253,-0.02595,0.41287,-0.23067,0.98019,-0.09473,0.99709,-0.10236,1,-0.10951,0.58965,1,0.83726,-1,0.82270,-0.17863,0.80760,-0.28257,-0.25914,0.92730,0.51933,0.05456,0.65493,-0.20392,0.93124,-0.41307,0.63811,-0.21901,0.86136,-0.87354,-0.23186,-1]
yhat = predict(row,model)
print('Predicted: %.3f (class=%d)' % (yhat, yhat.round()))

(351,)
235 116
Accuracy  0.8448275862068966
Predicted: 0.989 (class=1)
Predicted: 0.014 (class=0)
