# Image Preprocessing

In [13]:
import numpy as np
import pandas as pd
from skimage.io import imread_collection
from skimage.transform import resize
from skimage.color import rgb2gray

### Setting up the Structure of our Image Data

Our data needs to be in a particular format in order to solve an image classification problem. This notebook takes our picture data and transforms it into greyscaled, 1 dimensional arrays that we can then use to model.

### Loading & Pre-processing Data

Thanks to scikit-image (Imagine Hero Music!), This gives you a dataframe of bishops as grayscale, nicely resized (158x256) and each row as an image (thanks to ravel). 

Then all we need to do is replicate the following cell for the other chess pieces. 

After creating a dataframe from these images, I then save it as an csv and upload it into the other notebooks for modeling. 

In [14]:
# /* selects everything from that folder
bishops = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\Bishop/*")

# instantiate list of imgs
big_list_of_bishops = []

# loop through the folder bishops
for img in bishops:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_bishops.append(image_as_1d_vector)

bishops_df = pd.DataFrame(big_list_of_bishops)
bishops_df['kind'] = 'bishop'

In [16]:
kings = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\King/*")
big_list_of_kings = []
for img in kings:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_kings.append(image_as_1d_vector)
kings_df = pd.DataFrame(big_list_of_kings)
kings_df['kind'] = 'king'

In [18]:
queens = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\Queen/*")
big_list_of_queens = []
for img in queens:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_queens.append(image_as_1d_vector)
queens_df = pd.DataFrame(big_list_of_queens)
queens_df['kind'] = 'queen'

In [19]:
knights = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\Knight/*")
big_list_of_knights = []
for img in knights:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_knights.append(image_as_1d_vector)
knights_df = pd.DataFrame(big_list_of_knights)
knights_df['kind'] = 'knight'

In [20]:
pawns = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\Pawn/*")
big_list_of_pawns = []
for img in pawns:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_pawns.append(image_as_1d_vector)
pawns_df = pd.DataFrame(big_list_of_pawns)
pawns_df['kind'] = 'pawn'

In [21]:
rooks = imread_collection(r"C:\Users\latee\Downloads\chessman-image-dataset\Chessman-image-dataset\Chess\Rook/*")
big_list_of_rooks = []
for img in rooks:
    image_resized = resize(img, (158, 256))
    image_grayscaled = rgb2gray(image_resized)
    image_as_1d_vector = image_grayscaled.ravel()
    big_list_of_rooks.append(image_as_1d_vector)
rooks_df = pd.DataFrame(big_list_of_rooks)
rooks_df['kind'] = 'rook'

In [22]:
img_df = pd.concat([kings_df, queens_df, rooks_df, pawns_df, bishops_df, knights_df])

In [23]:
img_df2 = img_df.sample(frac=1).reset_index(drop=True)

In [25]:
img_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,65527,65528,65529,65530,65531,65532,65533,65534,65535,kind
0,0.224723,0.217918,0.228842,0.214555,0.226845,0.225818,0.22589,0.22589,0.22589,0.22589,...,0.228016,0.228016,0.228016,0.224463,0.21948,0.230376,0.220251,0.228628,0.222184,king
1,0.131955,0.131955,0.131955,0.131955,0.12932,0.120037,0.123943,0.126798,0.121073,0.110477,...,0.177426,0.187653,0.193352,0.192474,0.185692,0.195679,0.202022,0.199869,0.196888,king
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,king
3,0.996078,0.996078,0.996078,0.996078,0.996078,0.996078,0.996078,0.996078,0.996078,0.996078,...,0.071756,0.071756,0.071756,0.071756,0.071756,0.071756,0.071756,0.071756,0.071756,king
4,0.388068,0.387814,0.384436,0.379877,0.374571,0.371564,0.3696,0.36682,0.363788,0.362322,...,0.788236,0.795794,0.80184,0.803012,0.798857,0.78296,0.767002,0.750948,0.749741,king


In [14]:
img_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 555 entries, 0 to 104
Columns: 40449 entries, 0 to kind
dtypes: float64(40448), object(1)
memory usage: 171.3+ MB


In [24]:
img_df2.to_csv(r'C:\Users\latee\Documents\GitHub\Identifying Chess Pieces\img_df_256_256.csv')