<a href="https://colab.research.google.com/github/Sirfowahid/Artificial-Neural-Network/blob/master/ANN_Overfitting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import libraries
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# new!
from torch.utils.data import DataLoader

In [2]:
# import dataset (comes with seaborn)
import seaborn as sns
iris = sns.load_dataset('iris')


# convert from pandas dataframe to tensor
data = torch.tensor( iris[iris.columns[0:4]].values ).float()

# transform species to number
labels = torch.zeros(len(data), dtype=torch.long)
# labels[iris.species=='setosa'] = 0 # don't need!
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica'] = 2

In [3]:
# create our fake dataset

fakedata = np.tile(np.array([1,2,3,4]),(10,1)) + np.tile(10*np.arange(1,11),(4,1)).T
fakelabels = np.arange(10)>4
print(fakedata), print(' ')
print(fakelabels)

[[ 11  12  13  14]
 [ 21  22  23  24]
 [ 31  32  33  34]
 [ 41  42  43  44]
 [ 51  52  53  54]
 [ 61  62  63  64]
 [ 71  72  73  74]
 [ 81  82  83  84]
 [ 91  92  93  94]
 [101 102 103 104]]
 
[False False False False False  True  True  True  True  True]


In [4]:
# dataloader object with all data
fakedataLdr = DataLoader(fakedata, shuffle=True)
print( fakedataLdr )
print( fakedataLdr.batch_size )

<torch.utils.data.dataloader.DataLoader object at 0x7fd3963d3cd0>
1


In [5]:
# iterate through the data
for i,oneSample in enumerate(fakedataLdr):
  print(i,oneSample,oneSample.shape)

# but where are the labels??

0 tensor([[91, 92, 93, 94]]) torch.Size([1, 4])
1 tensor([[101, 102, 103, 104]]) torch.Size([1, 4])
2 tensor([[61, 62, 63, 64]]) torch.Size([1, 4])
3 tensor([[11, 12, 13, 14]]) torch.Size([1, 4])
4 tensor([[41, 42, 43, 44]]) torch.Size([1, 4])
5 tensor([[21, 22, 23, 24]]) torch.Size([1, 4])
6 tensor([[31, 32, 33, 34]]) torch.Size([1, 4])
7 tensor([[51, 52, 53, 54]]) torch.Size([1, 4])
8 tensor([[71, 72, 73, 74]]) torch.Size([1, 4])
9 tensor([[81, 82, 83, 84]]) torch.Size([1, 4])


In [6]:
# we need to create a Dataset that contains the data and labels
fakeDataset = torch.utils.data.TensorDataset(torch.Tensor(fakedata),torch.Tensor(fakelabels))
print( fakeDataset.tensors ), print(' ')

# then create another DataLoader
fakedataLdr = DataLoader(fakeDataset, shuffle=True)

# iterate through the data
for dat,lab in fakedataLdr:
  print(dat,lab)

(tensor([[ 11.,  12.,  13.,  14.],
        [ 21.,  22.,  23.,  24.],
        [ 31.,  32.,  33.,  34.],
        [ 41.,  42.,  43.,  44.],
        [ 51.,  52.,  53.,  54.],
        [ 61.,  62.,  63.,  64.],
        [ 71.,  72.,  73.,  74.],
        [ 81.,  82.,  83.,  84.],
        [ 91.,  92.,  93.,  94.],
        [101., 102., 103., 104.]]), tensor([0., 0., 0., 0., 0., 1., 1., 1., 1., 1.]))
 
tensor([[31., 32., 33., 34.]]) tensor([0.])
tensor([[21., 22., 23., 24.]]) tensor([0.])
tensor([[51., 52., 53., 54.]]) tensor([0.])
tensor([[101., 102., 103., 104.]]) tensor([1.])
tensor([[91., 92., 93., 94.]]) tensor([1.])
tensor([[81., 82., 83., 84.]]) tensor([1.])
tensor([[71., 72., 73., 74.]]) tensor([1.])
tensor([[11., 12., 13., 14.]]) tensor([0.])
tensor([[41., 42., 43., 44.]]) tensor([0.])
tensor([[61., 62., 63., 64.]]) tensor([1.])


In [8]:
# use scikitlearn to split the data
train_data,test_data, train_labels,test_labels = train_test_split(fakedata, fakelabels, test_size=.2)

# then convert them into PyTorch Datasets
train_data = torch.utils.data.TensorDataset(
     torch.Tensor(train_data),torch.Tensor(train_labels))

test_data = torch.utils.data.TensorDataset(
     torch.Tensor(test_data),torch.Tensor(test_labels))

# finally, translate into dataloader objects
# notice the batches (see next cell)!
train_loader = DataLoader(train_data,batch_size=4)
test_loader  = DataLoader(test_data)

In [9]:
# examine the contents of the dataloader (batching is an advantage of dataloader!)
print('TRAINING DATA')
for batch,label in train_loader: # iterable
  print(batch,label)
  print(' ')


print(' ')
print('TESTING DATA')
for batch,label in test_loader: # iterable
  print(batch,label)
  print(' ')

TRAINING DATA
tensor([[81., 82., 83., 84.],
        [71., 72., 73., 74.],
        [41., 42., 43., 44.],
        [31., 32., 33., 34.]]) tensor([1., 1., 0., 0.])
 
tensor([[11., 12., 13., 14.],
        [21., 22., 23., 24.],
        [91., 92., 93., 94.],
        [61., 62., 63., 64.]]) tensor([0., 0., 1., 1.])
 
 
TESTING DATA
tensor([[51., 52., 53., 54.]]) tensor([0.])
 
tensor([[101., 102., 103., 104.]]) tensor([1.])
 
