## Image Classification

The basic 7 steps for building models in general are listed so:

1. Load Dataset
2. Make Dataset Iterable
3. Create Model Class
4. Instantiate Model Class
5. Instantiate Loss Class
6. Instantiate Optimizer Class
7. Train Model

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from torch.utils.data import Dataset, DataLoader

In [2]:
all_df = pd.read_table('list_eval_partition.txt', delim_whitespace=True)
labels_df = pd.read_table('list_category_img.txt', delim_whitespace=True)


  """Entry point for launching an IPython kernel.
  


In [3]:
def func(image_name):
    category_label = labels_df[labels_df['image_name'].str.match(image_name)].iloc[0]['category_label']
    create_new = np.zeros(50)
    create_new[category_label - 1] = 1
    return create_new

In [4]:
x = labels_df["category_label"].value_counts()
# x.index.sort_values()
x

41    72158
18    36887
3     24557
32    19666
17    15429
33    14773
6     13311
16    13123
11    10467
19    10078
2      7495
48     7408
26     7076
42     6153
30     5013
29     4416
10     4048
34     3048
44     2294
39     2120
24     1669
35     1106
15      791
12      748
9       716
13      676
27      594
22      527
23      486
36      386
5       330
7       324
4       309
1       160
47      150
20      146
43      126
14       97
21       77
46       70
37       54
25       49
28       45
31       32
8        17
40       17
Name: category_label, dtype: int64

In [5]:
useless_attributes = [38,45,49,50]
low_count_attr = [1,4,5,7,8,14,20,21,22,23,27,13,9,12,15,25,28,31,36,46,37,40,43,46,47]

In [6]:
for low in low_count_attr:
    labels_df = labels_df[labels_df['category_label'] != low]
labels_df

Unnamed: 0,image_name,category_label
0,img/Sheer_Pleated-Front_Blouse/img_00000001.jpg,3
1,img/Sheer_Pleated-Front_Blouse/img_00000002.jpg,3
2,img/Sheer_Pleated-Front_Blouse/img_00000003.jpg,3
3,img/Sheer_Pleated-Front_Blouse/img_00000004.jpg,3
4,img/Sheer_Pleated-Front_Blouse/img_00000005.jpg,3
5,img/Sheer_Pleated-Front_Blouse/img_00000006.jpg,3
6,img/Sheer_Pleated-Front_Blouse/img_00000007.jpg,3
7,img/Sheer_Pleated-Front_Blouse/img_00000008.jpg,3
8,img/Sheer_Pleated-Front_Blouse/img_00000009.jpg,3
9,img/Sheer_Pleated-Front_Blouse/img_00000010.jpg,3


In [7]:
lower = [41,18,3,32,17,33,6,16,11,19]

for elem in lower:  
    jon = labels_df[labels_df['category_label'] == elem]
    drop_diff = len(labels_df[labels_df['category_label'] == elem]) - 10000
    # random_vector = np.random.choice(drop_diff + 10000, drop_diff)
    # labels_df = labels_df.drop(labels_df.[random_vector])
    # labels_df
    df_dropped = jon.drop(jon.sample(n=drop_diff).index)
    labels_df = labels_df[labels_df['category_label'] != elem]
    labels_df = labels_df.append(df_dropped)
labels_df

# duplicate = [35,24,39,44,34,10,29,30,42,26,48,2]

# for elem in duplicate:
#     snow = labels_df[labels_df['category_label'] == elem]
#     sample_diff = 10000 - len(snow)
#     df_dropped = snow.sample(n=10000, replace=True)
#     labels_df = labels_df[labels_df['category_label'] != elem]
#     labels_df = labels_df.append(df_dropped)

# indices = np.arange(len(labels_df))
# labels_df.set_index(indices)

Unnamed: 0,image_name,category_label
750,img/Single-Button_Blazer/img_00000001.jpg,2
751,img/Single-Button_Blazer/img_00000002.jpg,2
752,img/Single-Button_Blazer/img_00000003.jpg,2
753,img/Single-Button_Blazer/img_00000004.jpg,2
754,img/Single-Button_Blazer/img_00000005.jpg,2
755,img/Single-Button_Blazer/img_00000006.jpg,2
756,img/Single-Button_Blazer/img_00000007.jpg,2
757,img/Single-Button_Blazer/img_00000008.jpg,2
758,img/Single-Button_Blazer/img_00000009.jpg,2
759,img/Single-Button_Blazer/img_00000010.jpg,2


In [8]:
all_df = all_df[all_df.isin(labels_df)['image_name'] == True]
# print(len(all_df))
# all_df = labels_df[['image_name']].merge(all_df)
# all_df

In [9]:
'''
STEP 1: LOAD DATASET
'''
# test_df = pd.read_csv('fashionmnist/fashion-mnist_test.csv')
# test_df_labels = test_df['label']
# test_pixels_df = test_df.drop('label', axis=1)
validation_df = all_df[all_df['evaluation_status'].str.contains('val')]
# np.random.shuffle(validation_df)
validation_df = validation_df.drop(['evaluation_status'], axis=1)

train_df = all_df[all_df['evaluation_status'].str.contains('train')]
# np.random.shuffle(train_df)
train_df = train_df.drop(['evaluation_status'], axis=1)

test_df = all_df[all_df['evaluation_status'].str.contains('test')]
# np.random.shuffle(test_df)
test_df = test_df.drop(['evaluation_status'], axis=1)

# train_df = pd.read_csv('fashionmnist/fashion-mnist_train.csv')
# train_pixels_df = train_df.drop('label', axis=1)
# train_df_labels = train_df['label']
train_df

Unnamed: 0,image_name
0,img/Sheer_Pleated-Front_Blouse/img_00000001.jpg
9,img/Sheer_Pleated-Front_Blouse/img_00000010.jpg
11,img/Sheer_Pleated-Front_Blouse/img_00000012.jpg
24,img/Sheer_Pleated-Front_Blouse/img_00000025.jpg
25,img/Sheer_Pleated-Front_Blouse/img_00000026.jpg
31,img/Sheer_Pleated-Front_Blouse/img_00000032.jpg
33,img/Sheer_Pleated-Front_Blouse/img_00000034.jpg
42,img/Sheer_Pleated-Front_Blouse/img_00000043.jpg
43,img/Sheer_Pleated-Front_Blouse/img_00000044.jpg
48,img/Sheer_Pleated-Front_Blouse/img_00000049.jpg


In [10]:
# #np.unique(np.argmax(labelvalidation, axis=1), return_counts=True)

# validation_df.iloc[1334]['image_name']
# func('img/Open-Back_Knit_Blouse/img_00000020.jpg')

In [11]:
# np.unique(np.argmax(labeltrain, axis=1), return_counts=True)

In [13]:
allimagesvalidation = []
labelvalidation = []
# fiftyimagesArray = np.zeros(50)
for index in range(50):
#     create random number
    x = np.random.choice(20969)
    img_filepath = validation_df.iloc[x]['image_name']
    im = Image.open(img_filepath)
#     print(x)
    labelvalidation.append(func(img_filepath))
    imarr = np.uint8(np.asarray(im.convert('RGB').resize((224,224))))
    imarr = (imarr - imarr.mean())/imarr.std()
    allimagesvalidation.append(imarr)

allimagestrain = []
labeltrain = []
for index in range(500):
    x = np.random.choice(109826)
    img_filepath = train_df.iloc[x]['image_name']
    im = Image.open(img_filepath)
    labeltrain.append(func(img_filepath))
    imarr = np.uint8(np.asarray(im.convert('RGB').resize((224,224))))
    imarr = (imarr - imarr.mean())/imarr.std()
    allimagestrain.append(imarr)

# allimagestest = []
# labeltest = []
# for index in range(10000):
#     img_filepath = test_df.iloc[index]['image_name']
#     im = Image.open(img_filepath)
#     labeltest.append(func(img_filepath))
#     imarr = np.uint8(np.asarray(im.convert('RGB').resize((224,224))))
#     #imarr = np.round((imarr - imarr.mean())/imarr.std())
#     allimagestest.append(imarr)
# im = Image.open('414m1dOolTL._SX342_.jpg')
# imarr = np.uint8(np.asarray(im.convert('RGB').resize((224,224))))

# fiftyimagesArray

In [14]:
# '''
# STEP 1.5: defining and instantiating Dataset subclass 
# '''

# '''
# This is our custom Dataset class. Remember from 1st meeting that we need this to pipeline our data into training our model.

# The pipeline is important!!! At larger scale, machine learning can get bottlenecked at disk reads (in image classification for example)
# so understanding the various stages is important. We don't have to worry about that kind of stuff now since we're just creating small
# project models as opposed to complex production models.

# NOTE: this is not the only way to create a dataset. An alternative is to simply pass in a dataframe that contains both pixel and label data.
# Then we can index the label and pixel data inside of __getitem__ as opposed to separating labels and pixel data before hand like I did.
# '''
# class FashionDataset(Dataset):
#     def __init__(self, dataframe, labels):
#         self.labels = torch.LongTensor(labels)
#         self.df = dataframe
        
#     def __getitem__(self, index):
#         # I'm using .loc to access the row of the dataframe by index
#         a = self.df.loc[index]
# #         a = (a - np.mean(a))/np.std(a)
#         img = torch.Tensor(a)
#         label = self.labels[index]
#         return img, label

#     def __len__(self):
#         return len(self.labels)
    
# '''
# This class is for providing image data as (1, 28, 28) tensor as opposed to a (784) tensor. You
# use these for conv2d layers which are powerful for image recognition!

# NOTE: Please note that I normalized the data VERY INCORRECTLY. Here I am normalizing the data across 
# each sample individually which is not good. I should be normalizing across the ENTIRE training data set.

# Also, when I create the test dataset I should normalize it based on the TRAINING set's mean and standard deviation.
# Since the model is trained on the training data, we want to make sure that we transform the test data the same way we
# transform the training data. Otherwise it's like training a model to do one job and then testing it by on another job.
# '''
# class Fashion2DDataset(Dataset):
#     def __init__(self, dataframe, labels):
#         self.labels = torch.LongTensor(labels)
#         self.df = dataframe
        
#     def __getitem__(self, index):
#         # I'm using .loc to access the row of the dataframe by index
#         a = self.df.loc[index]
#         a = (a - np.mean(a))/np.std(a)
#         a = np.split(a, 28)
#         a = np.array([a])
#         img = torch.Tensor(a)
        
#         label = self.labels[index]
#         return img, label

#     def __len__(self):
#         return len(self.labels)

class ClothingDataset(Dataset):
    def __init__(self, data, labels):
        self.labels = labels
        self.data = data
        
    def __getitem__(self, index):
        img = self.data[index]
        
        trans = transforms.ToTensor()
        img = trans(img).float()
        
        label = self.labels[index]
        label = torch.LongTensor(label)
        
        return img, label

    def __len__(self):
        return len(self.labels)

In [15]:
'''
STEP 2: MAKING DATASET ITERABLE
'''
# train_dataset = Fashion2DDataset(train_pixels_df, train_df_labels.values)
# test_dataset = Fashion2DDataset(test_pixels_df, test_df_labels.values)

# batch_size = 100

# train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
#                                            batch_size=batch_size, 
#                                            shuffle=True)

# test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
#                                           batch_size=batch_size, 
#  shuffle=False)

batch_size = 25

train_dataset = ClothingDataset(allimagestrain, labeltrain)
validation_dataset = ClothingDataset(allimagesvalidation, labelvalidation) 
# test_dataset = ClothingDataset(allimagestest, labeltest)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size,                                             
                                           shuffle=True)
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
#                                           batch_size=batch_size,
#                                           shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, 
                                          batch_size=batch_size,
                                          shuffle=True)




In [46]:
'''
STEP 3: CREATE MODEL CLASS
'''
# class FeedforwardNeuralNetModel(nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim):
#         super(FeedforwardNeuralNetModel, self).__init__()
#         self.fc1 = nn.Linear(input_dim, hidden_dim) 
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(hidden_dim, output_dim)  

#     def forward(self, x):
#         out = self.fc1(x)
#         out = self.relu(out)
#         out = self.fc2(out)
#         return out
    
class ConvolutionalNeuralNetModel(nn.Module):
    def __init__(self, output_dim):
        super(ConvolutionalNeuralNetModel, self).__init__()
#         self.conv1 = nn.Conv2d(3, 10, kernel_size=56, stride=2, padding=2, bias=False)
#         self.bn1 = nn.BatchNorm2d(10)
#         self.relu = nn.ReLU(inplace=True)
#         self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
#         self.fc = nn.Linear(19360, output_dim)
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 25, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(25, 3, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(9408, 9408)
        self.fc2 = nn.Linear(9408, output_dim)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [47]:
'''
STEP 4: INSTANTIATE MODEL CLASS
'''
#input_dim = 224*224
#hidden_dim = 5000
output_dim = 50 

# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
model = ConvolutionalNeuralNetModel(output_dim)

In [48]:
'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()

In [49]:

'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
"""
Most of the time I use SGD. Feel free to use another optimizer if you wish.
What hyperparameters would you use/set here?
"""
learning_rate = .1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [50]:
'''
STEP 7: TRAIN THE MODEL
'''
iter = 0
losses = []
accuracies = []
for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        # Load images with gradient accumulation capabilities
#         images = images.view(-1, 28*28).requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)
        labels = torch.max(labels,1)[1]
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 1 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in validation_loader:
                # Load images with gradient accumulation capabilities
                # images = images.view(-1, 28*28).requires_grad_()

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                labels = torch.max(labels,1)[1]

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy =  100 * correct / total
            
            accuracies.append(accuracy)
            losses.append(loss.item())
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 1. Loss: 3.8959712982177734. Accuracy: 2
Iteration: 2. Loss: 3.663517951965332. Accuracy: 4
Iteration: 3. Loss: 3.812986135482788. Accuracy: 4
Iteration: 4. Loss: 5.973000526428223. Accuracy: 6
Iteration: 5. Loss: 3.904256820678711. Accuracy: 6
Iteration: 6. Loss: 3.6253392696380615. Accuracy: 6
Iteration: 7. Loss: 183.294921875. Accuracy: 6
Iteration: 8. Loss: 3.822131633758545. Accuracy: 6
Iteration: 9. Loss: 10.414481163024902. Accuracy: 8
Iteration: 10. Loss: 62.10285949707031. Accuracy: 8
Iteration: 11. Loss: 18.181743621826172. Accuracy: 4
Iteration: 12. Loss: 4.426313400268555. Accuracy: 4
Iteration: 13. Loss: 8.59347915649414. Accuracy: 4
Iteration: 14. Loss: 4.252591609954834. Accuracy: 4
Iteration: 15. Loss: 5.141870498657227. Accuracy: 6
Iteration: 16. Loss: 12.057385444641113. Accuracy: 8
Iteration: 17. Loss: 62.302276611328125. Accuracy: 8
Iteration: 18. Loss: 19.387571334838867. Accuracy: 4
Iteration: 19. Loss: 13.928518295288086. Accuracy: 8
Iteration: 20. Los

KeyboardInterrupt: 