In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm


REBUILD_DATA = True # set to true to one once, then back to false unless you want to change something in your training data.

class NoiseVSIdeal():
    IMG_SIZE = 50
    NOISE = "./noise"
    IDEAL = "./single"
    #TESTING = "PetImages/Testing"
    LABELS = {NOISE: 0, IDEAL: 1}
    training_data = []

    noisecount = 0
    idealcount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "png" or "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.NOISE:
                            self.noisecount += 1
                        elif label == self.IDEAL:
                            self.idealcount += 1

                    except Exception as e:
                        pass
                        #print(label, f, str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Noise:',self.noisecount)
        print('Ideal:',self.idealcount)

if REBUILD_DATA:
    noisevideal = NoiseVSIdeal()
    noisevideal.make_training_data()


training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))

  0%|          | 0/1501 [00:00<?, ?it/s]

./noise


100%|██████████| 1501/1501 [00:14<00:00, 105.78it/s]


./single


100%|██████████| 1500/1500 [00:12<00:00, 121.73it/s]


Noise: 1500
Ideal: 1500
3000


In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 8, 3) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(8, 16, 3) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv2d(16, 32, 3)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 32) #flattening.
        self.fc2 = nn.Linear(32, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x): 
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):# activation-layer = try sigmoid function
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.sigmoid(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.sigmoid(x)


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)


In [16]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

In [17]:
X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])

In [18]:
VAL_PCT = 0.1  # lets reserve 10% of our data for validation
val_size = int(len(X)*VAL_PCT)
print(val_size)

300


In [19]:
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X), len(test_X))

2700 300


In [26]:
BATCH_SIZE = 10
EPOCHS = 15

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
        #print(f"{i}:{i+BATCH_SIZE}")
        batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
        batch_y = train_y[i:i+BATCH_SIZE]

        net.zero_grad()

        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update

    print(f"Epoch: {epoch}. Loss: {loss}")

100%|██████████| 270/270 [00:11<00:00, 22.68it/s]
  1%|          | 3/270 [00:00<00:10, 24.35it/s]

Epoch: 0. Loss: 0.07124777138233185


100%|██████████| 270/270 [00:09<00:00, 28.13it/s]
  1%|          | 3/270 [00:00<00:10, 25.20it/s]

Epoch: 1. Loss: 0.057139430195093155


100%|██████████| 270/270 [00:09<00:00, 28.23it/s]
  1%|          | 3/270 [00:00<00:08, 29.99it/s]

Epoch: 2. Loss: 0.04916022717952728


100%|██████████| 270/270 [00:08<00:00, 30.46it/s]
  1%|▏         | 4/270 [00:00<00:09, 29.49it/s]

Epoch: 3. Loss: 0.0381368063390255


100%|██████████| 270/270 [00:08<00:00, 31.08it/s]
  1%|▏         | 4/270 [00:00<00:08, 33.15it/s]

Epoch: 4. Loss: 0.03404545783996582


100%|██████████| 270/270 [00:10<00:00, 25.71it/s]
  1%|          | 3/270 [00:00<00:12, 21.95it/s]

Epoch: 5. Loss: 0.03247779607772827


100%|██████████| 270/270 [00:18<00:00, 14.42it/s]
  0%|          | 1/270 [00:00<00:30,  8.70it/s]

Epoch: 6. Loss: 0.028898175805807114


100%|██████████| 270/270 [00:42<00:00,  6.37it/s]
  1%|          | 3/270 [00:00<00:14, 17.84it/s]

Epoch: 7. Loss: 0.025471359491348267


100%|██████████| 270/270 [00:41<00:00,  6.50it/s]
  1%|          | 3/270 [00:00<00:11, 23.23it/s]

Epoch: 8. Loss: 0.028762774541974068


100%|██████████| 270/270 [00:18<00:00, 14.22it/s]
  1%|          | 3/270 [00:00<00:09, 27.60it/s]

Epoch: 9. Loss: 0.020729245617985725


100%|██████████| 270/270 [00:18<00:00, 14.85it/s]
  1%|▏         | 4/270 [00:00<00:08, 32.09it/s]

Epoch: 10. Loss: 0.017555981874465942


100%|██████████| 270/270 [00:16<00:00, 16.36it/s]
  0%|          | 1/270 [00:00<00:38,  7.07it/s]

Epoch: 11. Loss: 0.019137876108288765


100%|██████████| 270/270 [00:14<00:00, 18.70it/s]
  1%|          | 3/270 [00:00<00:09, 29.20it/s]

Epoch: 12. Loss: 0.04893597960472107


100%|██████████| 270/270 [00:14<00:00, 18.96it/s]
  1%|          | 3/270 [00:00<00:08, 29.70it/s]

Epoch: 13. Loss: 0.00827743299305439


100%|██████████| 270/270 [00:14<00:00, 18.10it/s]

Epoch: 14. Loss: 0.0082036592066288





In [27]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 50, 50))[0]  # returns a list, 
        predicted_class = torch.argmax(net_out)

        if predicted_class == real_class:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

100%|██████████| 300/300 [00:01<00:00, 155.22it/s]

Accuracy:  0.893





In [28]:
net_out = net(test_X.view(-1, 1, 50, 50))

In [29]:
net_out

tensor([[9.7600e-01, 2.7595e-02],
        [2.1908e-04, 9.9980e-01],
        [5.4373e-04, 9.9952e-01],
        [1.3085e-02, 9.8650e-01],
        [9.6155e-04, 9.9904e-01],
        [1.0173e-01, 8.9724e-01],
        [9.0081e-01, 1.0064e-01],
        [2.3165e-01, 7.7067e-01],
        [1.0898e-02, 9.8866e-01],
        [1.6104e-03, 9.9838e-01],
        [7.0055e-03, 9.9305e-01],
        [4.3215e-03, 9.9550e-01],
        [2.0515e-02, 9.7672e-01],
        [4.6021e-03, 9.9531e-01],
        [3.4596e-04, 9.9964e-01],
        [9.9880e-01, 1.2880e-03],
        [3.8270e-03, 9.9664e-01],
        [8.0451e-02, 9.1983e-01],
        [9.9773e-01, 2.3651e-03],
        [8.2095e-02, 9.1602e-01],
        [1.4379e-01, 8.6116e-01],
        [7.3420e-01, 2.7565e-01],
        [9.9878e-01, 1.3051e-03],
        [1.0209e-02, 9.9078e-01],
        [9.8024e-04, 9.9899e-01],
        [9.9875e-01, 1.3365e-03],
        [9.9880e-01, 1.2872e-03],
        [5.3124e-01, 4.8129e-01],
        [5.8658e-02, 9.4191e-01],
        [7.680

tensor(598)

### Random Forest Classifier

In [30]:
import os
import cv2
import numpy as np
from tqdm import tqdm


REBUILD_DATA = True # set to true to one once, then back to false unless you want to change something in your training data.

class NoiseVSIdeal():
    IMG_SIZE = 50
    NOISE = "./noise"
    IDEAL = "./single"
    #TESTING = "PetImages/Testing"
    LABELS = {NOISE: 0, IDEAL: 1}
    training_data = []

    noisecount = 0
    idealcount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "png" or "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), [self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.NOISE:
                            self.noisecount += 1
                        elif label == self.IDEAL:
                            self.idealcount += 1

                    except Exception as e:
                        pass
                        #print(label, f, str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Noise:',self.noisecount)
        print('Ideal:',self.idealcount)

if REBUILD_DATA:
    noisevideal = NoiseVSIdeal()
    noisevideal.make_training_data()


training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))

  0%|          | 0/1501 [00:00<?, ?it/s]

./noise


100%|██████████| 1501/1501 [00:07<00:00, 205.12it/s]
  0%|          | 0/1500 [00:00<?, ?it/s]

./single


100%|██████████| 1500/1500 [00:08<00:00, 187.30it/s]


Noise: 1500
Ideal: 1500
3000


In [31]:
from sklearn.ensemble import RandomForestClassifier

In [32]:
from sklearn.model_selection import train_test_split

In [33]:
training_data

array([[array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]], dtype=uint8),
        list([0])],
       [array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255,  99, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]], dtype=uint8),
        list([0])],
       [array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]], dtype=uint8),
        list([0])],
       ...,
       [array([[255, 255, 255, 

In [34]:
df_x = [training_data[i][0][0] for i in range(len(training_data))]

In [35]:
df_x

[array([255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255], dtype=uint8),
 array([255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255], dtype=uint8),
 array([255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255], dtype=uint8),
 array([255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255

In [36]:
np.shape(df_x)

(3000, 50)

In [37]:
df_y = [training_data[i][:][1] for i in range(len(training_data))]

In [38]:
df_y

[[0],
 [0],
 [0],
 [1],
 [1],
 [1],
 [1],
 [1],
 [1],
 [1],
 [1],
 [0],
 [1],
 [1],
 [0],
 [1],
 [0],
 [0],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [0],
 [0],
 [0],
 [1],
 [0],
 [0],
 [1],
 [0],
 [1],
 [0],
 [0],
 [0],
 [1],
 [0],
 [0],
 [0],
 [1],
 [0],
 [0],
 [0],
 [1],
 [0],
 [1],
 [0],
 [1],
 [1],
 [0],
 [0],
 [1],
 [1],
 [1],
 [0],
 [1],
 [1],
 [1],
 [0],
 [0],
 [1],
 [0],
 [0],
 [1],
 [0],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [0],
 [1],
 [0],
 [0],
 [1],
 [0],
 [0],
 [1],
 [1],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [1],
 [1],
 [1],
 [0],
 [0],
 [1],
 [1],
 [1],
 [1],
 [0],
 [1],
 [1],
 [1],
 [0],
 [0],
 [1],
 [1],
 [1],
 [1],
 [0],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [0],
 [1],
 [0],
 [1],
 [0],
 [1],
 [1],
 [1],
 [1],
 [1],
 [1],
 [0],
 [0],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [0],
 [1],
 [0],
 [1],
 [0],
 [0],
 [0],
 [1],
 [1],
 [0],
 [1],
 [1],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [0],
 [0]

In [39]:
np.shape(df_y)

(3000, 1)

In [40]:
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=5)

In [41]:
rf = RandomForestClassifier(n_estimators=1000, criterion="gini")

In [42]:
rf.fit(x_train, np.ravel(y_train))

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [43]:
pred = rf.predict(x_test)

In [44]:
pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [45]:
np.ravel(y_test)

array([0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,

In [46]:
count =0

In [47]:
for i in range(len(pred)):
    if pred[i]==np.ravel(y_test)[i]:
        count += 1

In [48]:
count/len(pred)

0.49166666666666664