# Neural Network Deforestation Recognition
By: Luis Diego Fernández

In [2]:
# Data management & others
import gc
import os 
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter, defaultdict

In [3]:
# Deep Learning
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms

In [4]:
# Plotting
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls

### Training Folder Preparation

In [5]:
# Renaming dataset for easy import
for count, filename in enumerate(os.listdir("./train/train-jpg/")): 
    f_n = filename[6:]
    f_n = f_n.replace('.jpg','')
    
    if len(f_n) >= 5:
        continue

    zeros = "0" * (5 - len(str(f_n)))
    dst ="train_" + zeros + str(f_n) + ".jpg"
    src ='./train/train-jpg/'+ filename 
    dst ='./train/train-jpg/'+ dst 

    os.rename(src, dst) 


### Main DataFrame

In [6]:
df_train = pd.read_csv('train/train.csv')

In [7]:
df_train.head(9)

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road
5,train_5,haze primary water
6,train_6,agriculture clear cultivation primary water
7,train_7,haze primary
8,train_8,agriculture clear cultivation primary


Most common tags:

In [136]:
labels = df_train['tags'].apply(lambda x: x.split(' '))

counts = defaultdict(int)
for l in labels:
    for l2 in l:
        counts[l2] += 1

data=[go.Bar(x=list(counts.keys()), y=list(counts.values()))]
layout=dict(height=800, width=800, title='Distribution of training labels')
fig=dict(data=data, layout=layout)
py.iplot(data, filename='train-label-dist')

In [158]:
count = 0
for i in labels:
    if 'agriculture' == i[0]:
        count += 1
count

12314

All possible tags in dataset:

In [140]:
# Possible labels
classes = []

for key in counts:
    classes.append(key)
    
print("All classes:")
print(classes)

All clases:
['haze', 'primary', 'agriculture', 'clear', 'water', 'habitation', 'road', 'cultivation', 'slash_burn', 'cloudy', 'partly_cloudy', 'conventional_mine', 'bare_ground', 'artisinal_mine', 'blooming', 'selective_logging', 'blow_down']


Main tags to search for:

In [167]:
area_classes = ['primary','agriculture','water','habitation','cultivation', 'conventional_mine' ,'bare_ground', 'other']

print("Important classes")
print(area_classes)

Important classes
['primary', 'agriculture', 'water', 'habitation', 'cultivation', 'conventional_mine', 'bare_ground', 'other']


Getting the new tags:

In [173]:
new_labels = []

for i in labels:
    
    main_tag = 'other'
    
    for j in i:
        if j in area_classes:
            main_tag = j
            break
    
    indx = area_classes.index(main_tag)
    new_labels.append(indx)

labels = new_labels
len(labels)

40479

Distribución de los labels principales:

In [174]:
counts = defaultdict(int)
for l in new_labels:
    counts[area_classes[l]] += 1

data=[go.Bar(x=list(counts.keys()), y=list(counts.values()))]
layout=dict(height=800, width=800, title='Distribution of training labels')
fig=dict(data=data, layout=layout)
py.iplot(data, filename='train-label-dist')

Indexing labels with base on classes:

### Model definition and Training 

Image Loader:

In [351]:
BATCH_SIZE = 500

transform = transforms.Compose([transforms.Resize(64),
                                transforms.CenterCrop(64),
                                transforms.ToTensor()])

dataset = datasets.ImageFolder('./train/', transform=transform)

trainloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, \
                                          drop_last=True)

Definition of Neural Network class:

In [364]:
import torch.nn as nn 
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*13*13, 300) # SqareRoot(Mistmatch size/batchsize/channels)
        self.fc2 = nn.Linear(300, 150)
        self.fc3 = nn.Linear(150, 100)
        self.fc4 = nn.Linear(100, 40)
        self.fc5 = nn.Linear(40, 8)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*13*13)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x 

In [365]:
net = Net()

Createrion and optimization method selection

In [366]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)

In [367]:
tmp_labels = torch.from_numpy(np.asarray(labels))
tmp_labels = tmp_labels.long()

Training:

In [368]:
predictions_by_epoch = []
loss_by_epoch = []

for epoch in range(30):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, l = data
        
        # set up correct labels
        l = tmp_labels[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs)
        
        loss = criterion(outputs, l)
        loss.backward(retain_graph=True)
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 80 == 9:    # print every 80 mini-batches
            print('Ep %d loss: %.3f' %
                  (epoch + 1, running_loss / 10))
            running_loss = 0.0
            

print('Finished Training')

Ep 1 loss: 2.098
Ep 2 loss: 1.244
Ep 3 loss: 1.118
Ep 4 loss: 1.092
Ep 5 loss: 1.029
Ep 6 loss: 0.928
Ep 7 loss: 0.907
Ep 8 loss: 0.896
Ep 9 loss: 0.887
Ep 10 loss: 0.880
Ep 11 loss: 0.873
Ep 12 loss: 0.866
Ep 13 loss: 0.856
Ep 14 loss: 0.845
Ep 15 loss: 0.830
Ep 16 loss: 0.815
Ep 17 loss: 0.799
Ep 18 loss: 0.784
Ep 19 loss: 0.772
Ep 20 loss: 0.761
Ep 21 loss: 0.755
Ep 22 loss: 0.749
Ep 23 loss: 0.743
Ep 24 loss: 0.740
Ep 25 loss: 0.738
Ep 26 loss: 0.733
Ep 27 loss: 0.724
Ep 28 loss: 0.717
Ep 29 loss: 0.710
Ep 30 loss: 0.699
Finished Training


Save model to pc:

In [369]:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

### Results

Load model from pc if need it:

In [370]:
net = Net()
net.load_state_dict(torch.load(PATH))

<All keys matched successfully>

Prediction:

In [371]:
# Checks for zeros in an array, returns the percentage of zeros in it
def count_results(array):
    correct = 0
    
    for i in array:
        if i == 0:
            correct += 1
    
    return correct/len(array)

Load image to test and predict:

In [372]:
# Prediction
predictions = []
old_data = None
for i, data in enumerate(trainloader, 0):
    images,_ = data
    outputs = net(images)

    _, predicted = torch.max(outputs, 1)
    predictions += predicted
    
predictions = torch.Tensor(predictions)
predictions = predictions.type(torch.IntTensor)

Accuaracy in based of the trainning data:

In [374]:
print('Accuracy of the network on the test images: %d %%' % (
    100 * count_results(predictions - tmp_labels[:40000])))

Accuracy of the network on the test images: 76 %
