In [None]:
## This notebook is meant to be run in a Kaggle Kernel; both for the sake of not downloading the
## 1GB of training/testing data, as well as utilizing the Kaggle servers for computations
## to keep my computer from overheating.


# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
import matplotlib.pyplot as plt
%matplotlib inline

import cv2
import random
import gc


In [None]:
train_dir = '../input/dogs-vs-cats-redux-kernels-edition/train'
test_dir = '../input/dogs-vs-cats-redux-kernels-edition/test'

train_dogs = ['../input/dogs-vs-cats-redux-kernels-edition/train/{}'.format(i) for i in os.listdir(train_dir) if 'dog' in i] #get dog images
train_cats = ['../input/dogs-vs-cats-redux-kernels-edition/train/{}'.format(i) for i in os.listdir(train_dir) if 'cat' in i] #get cat images

test_imgs = ['../input/dogs-vs-cats-redux-kernels-edition/test/{}'.format(i) for i in os.listdir(test_dir)] #get test images

train_imgs = train_dogs[:2000] + train_cats[:2000] # slice the dataset and use 2000 in each class
random.shuffle(train_imgs)

del train_dogs
del train_cats
gc.collect()

In [None]:
import matplotlib.image as mpimg
for ima in train_imgs[0:3]:
    img = mpimg.imread(ima)
    imgplot = plt.imshow(img)
    plt.show()

In [None]:
#Declaring Image Dimensions; Using colored image, 3 Channels for RGB
nrows = 150
ncolumns = 150
channels = 3

In [None]:
def read_and_process_image(list_of_images):
    '''
    Returns two arrays:
        X is an array of resized images
        Y is an array of labels
    '''
    X = [] # Images
    Y = [] # Labels
    
    for image in list_of_images:
        X.append(cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR), (nrows,ncolumns), interpolation = cv2.INTER_CUBIC)) #Read the image
        if 'train/dog' in image:
            Y.append(1)
        elif 'train/cat' in image:
            Y.append(0)
            
    return X, Y

In [None]:
X, Y = read_and_process_image(train_imgs)

In [None]:
X[0]

In [None]:
np.mean(Y)

In [None]:
Y[:5]

In [None]:
plt.figure(figsize = (20,10))
columns = 5
for i in range(columns):
    plt.subplot(5 / columns + 1, columns, i + 1)
    plt.imshow(X[i])

In [None]:
import seaborn as sns
del train_imgs
gc.collect()

#Convert list to np.array()
X = np.array(X)
Y = np.array(Y)

#Lets plot the label to be sure we just have two classes
sns.countplot(Y)
plt.title('Labels for Cats and Dogs')

In [None]:
print('Shape of train images is:', X.shape)
print('Shape of labels is:', Y.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.20, random_state = 2)

print('Shape of train images is:', X_train.shape)
print('Shape of validation images is:', X_val.shape)
print('Shape of train labels is:', Y_train.shape)
print('Shape of validation labels is:', Y_val.shape)

In [None]:
del X
del Y
gc.collect()

#get the length of the train and validation data
ntrain = len(X_train)
nval = len(X_val)

#We will use a batch size of 32. Note: batch size should ve a factor of 2.***4,8,16, 32, etc..***
batch_size = 32

In [None]:
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn import linear_model, metrics

logistic = linear_model.LogisticRegression(solver='newton-cg', tol =1, C=6000)

rbm = BernoulliRBM(random_state=0, verbose = True, n_iter = 20,
                  learning_rate = 0.01, n_components = 50)

steps = [('rbm', rbm), ('logistic', logistic)]
rbm_features_classifier = Pipeline(steps)

rbm.fit(X_train.reshape(3200,67500),Y_train)

In [None]:
rbm.intercept_hidden_

In [None]:
plt.figure(figsize=(20,10))
for i, comp in enumerate(rbm.components_):
    plt.subplot(5,10, i +1)
    plt.imshow(comp.reshape(150,150,3))
    plt.xticks(())
    plt.yticks(())
plt.suptitle('50 components extracted by RBM', fontsize=10)

In [None]:
feats = rbm.components_
feats.reshape(50,150,150,3)
feats[0].shape

In [None]:
plt.imshow(feats[2].reshape(150,150,3))

In [None]:
X_train[0]