<a href="https://www.kaggle.com/code/jyothirocky/cert-proj-1-intel-image-classification?scriptVersionId=137394050" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Certificate Project - 1 : [Intel image classification](https://www.kaggle.com/puneet6060/intel-image-classification)

# Replace all '????' with correct function name/values.

# Import libraries:

In [1]:
import os
import glob
import numpy as np

from PIL import Image,ImageOps
import matplotlib.pyplot as plt
%matplotlib inline

# List out directories

In [2]:
base_dir = '../input/intel-image-classification/'
directory = os.listdir(base_dir)
directory

['seg_train', 'seg_pred', 'seg_test']

# Prepare training & testing directory paths

In [3]:
train_dir = base_dir + 'seg_train/seg_train/'
test_dir = base_dir + 'seg_test/seg_test/'

# Prepare CLASSES array

In [4]:
# Hint: 
#   You are free to use api available in 'os' library, which returns array of directories inside given path.
#   Or, you can manually prepare the array, as CLASSES = ['folder name1', 'folder name 2' ...]
CLASSES = os.listdir(train_dir)
CLASSES

['mountain', 'street', 'buildings', 'sea', 'forest', 'glacier']

# Total no. of images per class in training dataset

In [5]:
# TRAINING DIR: For each class, lets see how many images are there..
for imgType in CLASSES:
    imgTypePath = train_dir + "/" + imgType + "/"
    print("CLASS: " + imgType + ", Total images: " + str(len(os.listdir(imgTypePath)))) 

CLASS: mountain, Total images: 2512
CLASS: street, Total images: 2382
CLASS: buildings, Total images: 2191
CLASS: sea, Total images: 2274
CLASS: forest, Total images: 2271
CLASS: glacier, Total images: 2404


# Total no. of images per class in testing dataset

In [6]:
# TESTING DIR: For each class, lets see how many images are there..
for imgType in CLASSES:
    imgTypePath = test_dir + "/" + imgType + "/"
    print("CLASS: " + imgType + ", Total images: " + str(len(os.listdir(imgTypePath))))

CLASS: mountain, Total images: 525
CLASS: street, Total images: 501
CLASS: buildings, Total images: 437
CLASS: sea, Total images: 510
CLASS: forest, Total images: 474
CLASS: glacier, Total images: 553


# Pipeline helper functions

In [7]:
def convert_to_grayscale(img):
    #Convert to grayscale
    return ImageOps.grayscale(img) #Return gray scale image object using ImageOps.

def reshape_img(img, target_size=(150,150)):
    #Reshape any image to a fixed shape
    return img.resize(target_size,Image.ANTIALIAS) #Hint: Use resize() API on image object and pass target size param.

def display_numpy_img(np_img, img_name="Transformed image"):
    plt.figure(figsize = (6,6))
    plt.imshow(np_img, cmap='gray')
    plt.title(img_name)
    
def transform_image(img_file_path):
    img_obj = Image.open(img_file_path)
    #print(img_obj.format)
    #print(np.array(img_obj).shape)
    #Perform transformations in series
    img_obj = convert_to_grayscale(img_obj)
    img_obj = reshape_img(img_obj, (150,150))
    np_arr_img = np.array(img_obj)
    return np_arr_img

def load_dir_to_numpy(dir_path, maxImgs=1500):
    file_list = glob.glob(dir_path+'/*')
    imgs = []
    #Load image by image
    imgCount=0
    for fname in file_list:
        if imgCount>=maxImgs:
            break
        img_np = transform_image(fname)
        imgs.append(img_np)
        imgCount = imgCount + 1
    np_imgs = np.array(imgs)
    return np_imgs

def prepare_image_data(dir_path, MAX_IMGS):
    imgs_arr_X = []
    data_arr_y = []
    classIdx = 0;
    for imgType in CLASSES:
        IMG_DIR = dir_path + "/" + imgType + "/"
        #  print("IMG_DIR: " + IMG_DIR)
        imgs_arr = load_dir_to_numpy(IMG_DIR, MAX_IMGS)
        #print(imgType + ": " + str(imgs_arr.shape))
        imgs_arr_X.extend(imgs_arr)
        data_y = np.full((imgs_arr.shape[0],1), classIdx)
        data_arr_y.extend(data_y)
        classIdx += 1
    np_img_arr_X = np.array(imgs_arr_X)
    np_data_arr_y = np.array(data_arr_y)
    return np_img_arr_X,np_data_arr_y

# Prepare training dataset

In [8]:
train_np_x,train_np_y = prepare_image_data(train_dir, 500)
print('train_np_x.shape:', train_np_x.shape)
print('train_np_y.shape:', train_np_y.shape)

train_np_x.shape: (3000, 150, 150)
train_np_y.shape: (3000, 1)


# Flatten out the 2D image data into 1D vector

In [9]:

train_size = train_np_x.shape[0]
train_np_x = train_np_x.reshape((train_size, -1))
print('After reshaping, train_np_x.shape:', train_np_x.shape)

After reshaping, train_np_x.shape: (3000, 22500)


# Import & Prepare the model object

In [10]:
# Import a model
from sklearn.linear_model import SGDClassifier
model = SGDClassifier() #Hint: Any algorithm say sklearn.linear_model.SGDClassifier OR sklearn.tree.DecisionTreeClassifier() etc..

# Train the model

In [11]:
model.fit(train_np_x, train_np_y.reshape(-1))

SGDClassifier()

# Prepare testing data

In [12]:
test_np_x,test_np_y = prepare_image_data(test_dir, 200)

test_size = test_np_x.shape[0]
test_np_x = test_np_x.reshape((test_size, -1))
print('Test shape:', test_np_x.shape)

Test shape: (1200, 22500)


# Predict using testing data

In [13]:
# Get predicted values for 'test_np_x' using trained 'model' 
predicted_y = model.predict(test_np_x)

# What's the trained model accuracy on test data?

In [14]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
actual_y = test_np_y.reshape(-1)
print('Accuracy Score:', accuracy_score(actual_y, predicted_y))

Accuracy Score: 0.2725


# Model training performance report

In [15]:
print(classification_report(actual_y, predicted_y))

              precision    recall  f1-score   support

           0       0.25      0.78      0.38       200
           1       0.43      0.34      0.38       200
           2       0.21      0.23      0.22       200
           3       0.27      0.06      0.10       200
           4       0.31      0.11      0.16       200
           5       0.28      0.12      0.17       200

    accuracy                           0.27      1200
   macro avg       0.29      0.27      0.23      1200
weighted avg       0.29      0.27      0.23      1200



# Confusion matrix

In [16]:
print(confusion_matrix(actual_y, predicted_y))

[[156   9  21   1   5   8]
 [ 55  67  42   6  10  20]
 [ 99  22  45   9  14  11]
 [128   9  37  12   6   8]
 [ 78  36  35  13  22  16]
 [108  12  38   3  14  25]]


**Exploring Dataset**

In [17]:
import numpy as np