In [None]:
from google.colab import drive
drive.mount('/drive')

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


Installing a newer version for implementing sift 

In [None]:
!pip install opencv-contrib-python==4.4.0.44



Import required Libraries 

In [None]:
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, f1_score, precision_score, \
                            recall_score, accuracy_score, classification_report

Extract Features 
1. Hu moments
2. LBP
3. color histogram

Features Tried But did not work well
4. HoG [Decrease in Accuracy]
5. GLCM [Decrease in Accuracy]
6. Haralick [Could not start mahotas.features ]

Global Features
7. SIFT [Could not combine local and global features using K-means]

In [None]:
def extract_hu_moments(img):
    """Extract Hu Moments feature of an image. Hu Moments are shape descriptors.
    :param img: ndarray, BGR image
    :return feature: ndarray, contains 7 Hu Moments of the image
    """
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(gray)).flatten()
    return feature

# feature-descriptor-2: LBP
def extract_lbp(img, numPoints=24, radius=8):
    """Extract Local Binary Pattern histogram of an image. Local Binary Pattern features are texture descriptors.
    :param img: ndarray, BGR image
    :return feature: ndarray, contains (numPoints+2) Local Binary Pattern histogram of the image
    """
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, numPoints, radius, method='uniform')
    n_bins = int(lbp.max() + 1)
    feature, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins), density=True)
    return feature

# feature-descriptor-3: Color histogram
def extract_color_histogram(img, n_bins=8):
    """Extract Color histogram of an image.
    :param img: ndarray, BGR image
    :return feature: ndarray, contains n_bins*n_bins*n_bins HSV histogram features of the image
    """
    
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # convert the image to HSV color-space
    hist  = cv2.calcHist([hsv], [0, 1, 2], None, [n_bins, n_bins, n_bins], [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    feature = hist.flatten()
    return feature

# feature-descriptor-4: Haralick Texture
def get_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.haralick(gray).mean(axis=0)
    # return the result
    return haralick

# feature-descriptor-5: HOG (Histogram of Gradients)
def get_hog_features(image):
    """
    Gets the histogram of oriented gradients for an image
    """
    fd, hog_image = hog(image, 
                        orientations=8, 
                        pixels_per_cell=(64, 64),
                        cells_per_block=(1, 1),
                        visualize=True,
                        multichannel=True)
    return fd

def get_hog_mean(image):
    """
    Gets the mean of the HOG response map
    """
    fd = get_hog_features(image)
    return np.mean(fd) if len(fd) else 0
    
def get_hog_std(image):
    """
    Gets the standard deviation of the HOG response map
    """
    fd = get_hog_features(image)
    return np.std(fd) if len(fd) else 0 


# feature-descriptor-6: GLCM 
def get_GLCM(image, bit_depth=8):
      GLCM_0  = greycomatrix(image[:,:,0],  [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=2**bit_depth)
      GLCM_1  = greycomatrix(image[:,:,1],  [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=2**bit_depth)
      GLCM_2  = greycomatrix(image[:,:,2],  [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=2**bit_depth)
      contrast_0  = greycoprops(GLCM_0,  'contrast').mean()
      contrast_1  = greycoprops(GLCM_1,  'contrast').mean()
      contrast_2  = greycoprops(GLCM_2,  'contrast').mean()
      dissim_0    = greycoprops(GLCM_0,  'dissimilarity').mean()
      dissim_1    = greycoprops(GLCM_1,  'dissimilarity').mean()
      dissim_2    = greycoprops(GLCM_2,  'dissimilarity').mean()
      correl_0    = greycoprops(GLCM_0,  'correlation').mean()
      correl_1    = greycoprops(GLCM_1,  'correlation').mean()
      correl_2    = greycoprops(GLCM_2,  'correlation').mean()
      homo_0      = greycoprops(GLCM_0,  'homogeneity').mean()
      homo_1      = greycoprops(GLCM_1,  'homogeneity').mean()
      homo_2      = greycoprops(GLCM_2,  'homogeneity').mean()
      return [ contrast_0, dissim_0, correl_0, homo_0, contrast_1, dissim_1,
                correl_1, homo_1, contrast_2, dissim_2, correl_2, homo_2 ]


def extract_global_features(img):
    """Extract global features (shape, texture and color features) of an image.
    :param img: ndarray, BGR image
    :return global_feature: ndarray, contains shape, texture and color features of the image
    """
    
    hu_moments = extract_hu_moments(img)
    #zernike_moments = extract_zernike_moments(img)
    #haralick   = extract_haralick(img)
    lbp_histogram  = extract_lbp(img)
    color_histogram  = extract_color_histogram(img)
    global_feature = np.hstack([hu_moments, lbp_histogram, color_histogram])
    
    return global_feature


def extract_keypoints(keypoint_detector, image):    
    keypoints, descriptors = keypoint_detector.detectAndCompute(image, None)
    return np.array(descriptors) if descriptors is not None else np.array([])


def flatten_keypoint_descriptors(X_train_local_features):
    descriptor_list_train = np.array(X_train_local_features[0])
    for remaining in X_train_local_features[1:]:
    	descriptor_list_train = np.vstack((descriptor_list_train, remaining))
    return descriptor_list_train

def cluster_local_features(descriptor_list_train, n_clusters=20): 
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(descriptor_list_train)
    return kmeans
    

#### Different way to extract local features SIFT
# def extract_local_features(X_train_local_features, X_test_local_features):
    
#     k=20

#     # # Scaling descriptors
#     # scaler = MinMaxScaler()
#     # X_train_local_features = scaler.fit_transform(X_train_local_features)
#     # X_test_local_features = scaler.fit_transform(X_test_local_features)
    
#     km = KMeans(n_clusters=k)
#     kmeans = km.fit(X_train_local_features)
    
#     print("[INFO] Adding cluster features")
#     X_clustered_train = kmeans.predict(X_train_local_features)
#     X_clustered_test  = kmeans.predict(X_test_local_features)

#     return X_clustered_train, X_clustered_test


#### Another attempt to extract Local Features 

# def extract_local_features(X_train_local_features, X_test_local_features):
    
#     n_clusters=20

#     # flatten keypoint_descriptors
#     # descriptor_list_train = flatten_keypoint_descriptors(X_train_local_features)
#     # descriptor_list_test = flatten_keypoint_descriptors(X_test_local_features)
 
#     # cluster keypoint descriptors
#     kmeans = cluster_local_features(X_train_local_features, n_clusters=n_clusters)
#     descriptor_clustered_train = kmeans.predict(X_train_local_features)
#     descriptor_clustered_test = kmeans.predict(X_train_local_features)

#     # For each image, count number of keypoints in each cluster that the image has
#     X_clustered_train = np.array([np.zeros(n_clusters) for i in range(len(X_train_local_features))])
#     old_count = 0
#     for i in range(len(X_train_local_features)):
#     	nb_descriptors = len(X_train_local_features[i])
#     	for j in range(nb_descriptors):
#     		idx = descriptor_clustered_train[old_count+j]
#     		X_clustered_train[i][idx] += 1
#     	old_count += nb_descriptors
    
#     X_clustered_test = np.array([np.zeros(n_clusters) for i in range(len(X_test_local_features))])
#     old_count = 0
#     for i in range(len(X_test_local_features)):
#         nb_descriptors = len(X_test_local_features[i])
#         for j in range(nb_descriptors):
#             idx = descriptor_clustered_test[old_count+j]
#             X_clustered_test[i][idx] += 1
#         old_count += nb_descriptors
        
#     return X_clustered_train, X_clustered_test

Fitting the PCA algorithm with our extended features did not give promising results, therefore not using in final version of classification

In [None]:
#Fitting the PCA algorithm 
def show_variance_explained_pca(data):
    """
    Showing cumulative variance explained by the principle components after performing pca on input dataframe
    """
    pca = PCA().fit(data)
    #Plotting the Cumulative Summation of the Explained Variance
    plt.figure()
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('Number of Components')
    plt.ylabel('Variance (%)') #for each component
    plt.title('Explained Variance')
    plt.show()
    
def fit_pca(pca, train_x, val_x):
    """
    Returning the transformed datasets using the provided pca
    Fitting using only training data, transforms both
    """
    train_x_pca = pca.fit_transform(train_x)
    val_x_pca = pca.transform(val_x)    
    return train_x_pca, test_x_pca


# pca = PCA().fit(train_data)
# plt.figure()
# plt.plot(np.cumsum(pca.explained_variance_ratio_))
# plt.xlabel('Number of Components')
# plt.ylabel('Variance (%)') #for each component
# plt.title('Explained Variance')
# plt.show()
# train_x_pca = pca.fit_transform(train_data)

In [None]:
import os
import sys
sys.path.insert(0, os.getcwd()) # add current working directory to pythonpath

import pandas as pd
import cv2
import numpy as np
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
import pickle
import warnings
import argparse

In [None]:
def resize_image(img, img_size):
    """Pad and Resize image to a fixed size (img_size, img_size)
    :param img: ndarray, BGR image
    :param img_size: int, size to resize images.
    :return img_resized: padded and resized image
    """
    
    # pad image to the max dimension
    top = (max(img.shape[:2]) - img.shape[0])//2
    bottom = max(img.shape[:2]) - img.shape[0] - top
    left = (max(img.shape[:2]) - img.shape[1])//2
    right = max(img.shape[:2]) - img.shape[1] - left
    img_padded = cv2.copyMakeBorder(img, top, bottom, left, right,cv2.BORDER_CONSTANT, value=[0, 0, 0])
    
    # resize image
    img_resized = cv2.resize(img_padded, (img_size, img_size))
    return img_resized

### For removal of hair from the images 
def remove_hair(image):
  # Convert the original image to grayscale
  grayScale = cv2.cvtColor( image, cv2.COLOR_RGB2GRAY ) 
  #cv2_imshow(grayScale)

  # Kernel for the morphological filtering
  kernel = cv2.getStructuringElement(1,(17,17))

  # clahe = cv2.createCLAHE(clipLimit = 5)
  # clahe_img = clahe.apply(grayScale) + 30

  # Perform the blackHat filtering on the grayscale image to find the hair countours
  blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
  #cv2_imshow(blackhat)

  # intensify the hair countours in preparation for the inpainting  
  # algorithm 
  ret,thresh2 = cv2.threshold(blackhat,10,255,cv2.THRESH_BINARY)
  #cv2_imshow(thresh2)

  # inpaint the original image depending on the mask
  dst = cv2.inpaint(image,thresh2,1,cv2.INPAINT_TELEA)
  #cv2_imshow(dst)

  return dst


#### Cycle through the folders, get labels and read each image, resize > remove hair > extract local and global features 

def prepare_dataset(input_path, img_size=None, keypoint_detector=None):
    """Process images of different classes and extract labels and global features of images
    """
    
    # if keypoint_detector is None:
    #     keypoint_detector = cv2.SIFT_create()
 
    global_features = []
    local_features = []
    labels = []
    folder_list = os.listdir(input_path)
    for folder in folder_list:
        print('Processing: ' + folder)       
        folder_path = os.path.join(input_path, folder)
        file_list = os.listdir(folder_path)
        for filename in file_list:
            img = cv2.imread(os.path.join(folder_path, filename))
            if img_size is not None:
                img = resize_image(img, img_size)
                img = remove_hair(img)
            global_feature = extract_global_features(img)
            global_features.append(global_feature)
            labels.append(folder)
            
            # keypoint_descriptors = extract_keypoints(keypoint_detector, img)    
            # local_features.append(keypoint_descriptors)
                   
    return np.array(global_features), local_features, np.array(labels) 
    

def train_model(model, X_train, y_train, parameters, n_splits=3):
    """Train model with Grid-search cross validation to find the best hyperparameter
    """
    
    splits = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0).split(X=X_train, y=y_train)
    
    clf = GridSearchCV(model, parameters, cv=splits, scoring=make_scorer(accuracy_score))
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')  # disable the warning on default optimizer
        clf.fit(X_train, y_train)

    return clf.best_estimator_, clf.best_score_, clf.best_params_


def evaluate_model(model, X_train, y_train, X_test, y_test):
    """Evaluate model on testset
    :param model: Scikit-learn estimator
    :param X_train: trainset features
    :param y_train: trainset label
    :param X_test: testset features
    :param y_test: testset label
    :param parameters: dict, key is hyper parameter name and value is a list of hyper parameter values
    :return model: Scikit-learn estimator, fitted on the whole trainset
    :return y_predict: prediction on test set
    :return scores: dict, evaluation metrics on test set
    """
    
    # Refit the model on the whole train set
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')  # disable the warning on default optimizer
        model.fit(X_train, y_train)
        
    # Evaluate on test set
    y_pred = model.predict(X_test)
    scores = None
    if y_test is not None:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')  # disable the warning on f1-score with not all labels
            print('*** TEST SET PERFORMANCE EVALUATION + Feature Extraction ***')
            
            # compute and plot performance metrics
            accuracy = accuracy_score(y_test, y_pred)
            scores = accuracy
            val_f1 = f1_score(y_test, y_pred, average='weighted')
            val_recall = recall_score(y_test, y_pred, average='weighted')
            val_precision = precision_score(y_test, y_pred, average='weighted')
            val_kappa = cohen_kappa_score(y_test,y_pred,weights='quadratic')

            print('Accuracy: {:.3f}'.format(accuracy))
            print('Kappa: {:.3f}'.format(val_kappa))
            print('F1-score: {:.3f}'.format(val_f1))
            print('Recall: {:.3f}'.format(val_recall))
            print('Precision: {:.3f}'.format(val_precision))
            
    return model, y_pred, scores


def build_base_models(X_train, y_train):
    """Train and evaluate different base models.
    :param X_train: trainset features
    :param y_train: trainset label
    :return fitted_models: list, contains fitted Scikit-learn estimators
    :return model_names: list, names of fitted Scikit-learn estimators
    :return model_scores: list, contains scores on test set for fitted Scikit-learn estimators.
                    Each score is a dict of evaluation metrics
    """
    
# DEFINE MODELS ###
    models = []
    model_params = []
    model_names = []
    
    # Random forest model
    for n_estimators in [500, 1000, 2000]:
        for max_depth in [3, 5, 7]:
            models.append(RandomForestClassifier(max_features='sqrt', class_weight='balanced', random_state=0))
            model_params.append({'n_estimators':[n_estimators], 'max_depth':[max_depth]})
            model_names.append('Random Forest')   
    
    # Boosted Tree
    for n_estimators in [500, 1000, 2000]:
        for max_depth in [3, 5, 7]:
            for learning_rate in [0.01, 0.1]:
                models.append(GradientBoostingClassifier(subsample=0.7, max_features='sqrt', random_state=0))
                model_params.append({'n_estimators':[n_estimators], 'max_depth':[max_depth], 'learning_rate':[learning_rate]})
                model_names.append('Gradient Boosting Machine')
    
    # SVM
    for kernel in ['linear', 'rbf']:
        for C in [1.0, 10.0, 100.0, 1000.0]:
            models.append(SVC(probability=True, gamma='auto', tol=0.001, cache_size=200, class_weight='balanced',
                              random_state=0,
                              decision_function_shape='ovr'))
            model_params.append({'kernel':[kernel], 'C':[C]})
            model_names.append('Support Vector Machine')
    
    # Logistic regression model
    for penalty in ['l1', 'l2']:
        for C in [1.0, 10.0, 100.0, 1000.0]:
            models.append(linear_model.LogisticRegression(max_iter=500, solver='liblinear', multi_class='ovr',
                                                          class_weight='balanced', random_state=0))
            model_params.append({'penalty':[penalty], 'C':[C]})
            model_names.append('Logistic Regression')
        
    # KNN
    for n_neighbors in [5, 10, 15]:
        for weights in ['uniform', 'distance']:
            models.append(KNeighborsClassifier())
            model_params.append({'n_neighbors':[n_neighbors], 'weights':[weights]})
            model_names.append('K Nearest Neighbour')

    ###### TRAIN AND EVALUATE BASE MODELS #
    # Loop through the different parameters on each model 
    fitted_models = []
    model_scores = []
    for i in range(len(models)):
        print('Evaluating model {} of {}: {}'.format((i+1), len(models), model_names[i]))
        model = models[i]
        fitted_cv, _, _ = train_model(model=model, X_train=X_train, y_train=y_train, parameters=model_params[i])
        fitted_whole_set, _, score = evaluate_model(model=fitted_cv, X_train=X_train, y_train=y_train,
                                                    X_test=X_test, y_test=y_test)
        fitted_models.append(fitted_whole_set)
        model_scores.append(score)
        print(model_names[i], score)
        
    return fitted_models, model_names, model_scores

Main code section

In [None]:
#Path for the data and output 
data_path = "/drive/MyDrive/cad"
save_path = "/drive/MyDrive/cad/output1"

img_size = None
n_splits = 3

# Extract features and labels for train set and test set in the train and val path
train_path = "/drive/MyDrive/CADx project /Challenges/1/train"
test_path = "/drive/MyDrive/CADx project /Challenges/1/val"

#Extract global features 
X_train_global_features, keypoints_features_train, y_train = prepare_dataset(train_path, img_size=img_size)
X_test_global_features, keypoints_features_test, y_test = prepare_dataset(test_path, img_size=img_size)

# Get the final clustered train/test data [with global + local features]
#X_clustered_train, X_clustered_test = extract_local_features(keypoints_features_train,keypoints_features_test)
#X_train_all_features = np.hstack((X_train_global_features, X_clustered_train))
#X_test_all_features = np.hstack((X_test_global_features, X_clustered_test))
## Tried but could not achieve 

#Change of name
X_train_all_features = X_train_global_features
X_test_all_features = X_test_global_features

# Normalize features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train_all_features)
X_test = scaler.transform(X_test_all_features)

# Build base models
base_models, base_model_names, base_model_scores = build_base_models(X_train, y_train)
if save_path is not None:
    # Save base models    
    os.makedirs(os.path.join(save_path, 'base_models'), exist_ok=True)
    for i in range(len(base_models)):
        with open(os.path.join(save_path, 'base_models', 'base_model_' + str(i+1) + '.pkl'), 'wb') as f:
            pickle.dump(base_models[i], f)
                    

Processing: les
Processing: nv
Processing: les
Processing: nv
Evaluating model 1 of 49: Random Forest
*** TEST SET PERFORMANCE EVALUATION + Feature Extraction ***
Accuracy: 0.763
Kappa: 0.526
F1-score: 0.757
Recall: 0.763
Precision: 0.794
Random Forest 0.7634854771784232
Evaluating model 2 of 49: Random Forest
*** TEST SET PERFORMANCE EVALUATION + Feature Extraction ***
Accuracy: 0.790
Kappa: 0.580
F1-score: 0.786
Recall: 0.790
Precision: 0.813
Random Forest 0.7900414937759336
Evaluating model 3 of 49: Random Forest
*** TEST SET PERFORMANCE EVALUATION + Feature Extraction ***
Accuracy: 0.810
Kappa: 0.620
F1-score: 0.808
Recall: 0.810
Precision: 0.823
Random Forest 0.8099585062240664
Evaluating model 4 of 49: Random Forest
*** TEST SET PERFORMANCE EVALUATION + Feature Extraction ***
Accuracy: 0.759
Kappa: 0.518
F1-score: 0.753
Recall: 0.759
Precision: 0.789
Random Forest 0.7593360995850622
Evaluating model 5 of 49: Random Forest
*** TEST SET PERFORMANCE EVALUATION + Feature Extraction *

In [None]:
import pickle as pkl
with open("scaler.pkl", "wb") as outfile:
    pkl.dump(scaler, outfile)

from google.colab import files

Read the test images, extract features and predict the class from the predefined best model 

In [None]:
# Set test folder path 
test_path = "/drive/MyDrive/CADx project /Challenges/cad/test"

#Variable initialization
img_size = None
test_global_features = []
file_list = None
#Read images through the folder and extract features from them 
file_list = os.listdir(test_path)
print(file_list)
sorted_files = sorted(file_list)
print(sorted_files)
print(len(sorted_files))

# for filename in sorted_files:
#   img = cv2.imread(os.path.join(test_path, filename))
#   if img_size is not None:
#     img = resize_image(img, img_size)
#     img = remove_hair(img)
#   test_feature = extract_global_features(img)
#   test_global_features.append(test_feature) 

# #Convert to array
# X_test_global_features = np.array(test_global_features)

# #Look at the feature shape
# print(np.array(X_test_global_features).shape)
# print("Feature Extration Complete")

# # Normalize features
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler(feature_range=(0, 1))
# X_train = scaler.fit_transform(X_train_all_features)
# X_test = scaler.transform(X_test_global_features)



['xx0790.jpg', 'xx0507.jpg', 'xx0624.jpg', 'xx0673.jpg', 'xx0251.jpg', 'xx0967.jpg', 'xx0989.jpg', 'xx0074.jpg', 'xx0872.jpg', 'xx1004.jpg', 'xx0343.jpg', 'xx1008.jpg', 'xx0869.jpg', 'xx0230.jpg', 'xx0017.jpg', 'xx0223.jpg', 'xx0525.jpg', 'xx0716.jpg', 'xx0192.jpg', 'xx0124.jpg', 'xx0349.jpg', 'xx0024.jpg', 'xx0180.jpg', 'xx0224.jpg', 'xx0732.jpg', 'xx0088.jpg', 'xx0186.jpg', 'xx0653.jpg', 'xx0283.jpg', 'xx0367.jpg', 'xx0119.jpg', 'xx0003.jpg', 'xx0780.jpg', 'xx0558.jpg', 'xx0721.jpg', 'xx0468.jpg', 'xx0127.jpg', 'xx0315.jpg', 'xx0888.jpg', 'xx0357.jpg', 'xx0481.jpg', 'xx0578.jpg', 'xx0821.jpg', 'xx0005.jpg', 'xx0306.jpg', 'xx0311.jpg', 'xx0393.jpg', 'xx0930.jpg', 'xx0871.jpg', 'xx0724.jpg', 'xx0557.jpg', 'xx0370.jpg', 'xx0556.jpg', 'xx0143.jpg', 'xx0809.jpg', 'xx0564.jpg', 'xx0626.jpg', 'xx0232.jpg', 'xx0298.jpg', 'xx0361.jpg', 'xx0889.jpg', 'xx0978.jpg', 'xx0838.jpg', 'xx0794.jpg', 'xx0319.jpg', 'xx0245.jpg', 'xx0010.jpg', 'xx0478.jpg', 'xx0219.jpg', 'xx0631.jpg', 'xx0193.jpg', 'xx03

Load pre-defined best model using pickle and predict and save as csv on the test feature data 

In [None]:
#Path to best model 
final_model = "/drive/MyDrive/cad/output1/base_models/base_model_25.pkl"

#Load the model 
loaded_model = pickle.load(open(final_model, 'rb'))

#Predict test data on the model
y_pred = loaded_model.predict(X_test)

#Convert the class-labels into proper encoding as suggested 
print(y_pred)
y_pred = np.where(y_pred=='nv', 0, y_pred)
y_pred = np.where(y_pred=='les',1,y_pred)
print(y_pred)

# Saving NumPy array as a csv file
pd.DataFrame(y_pred).to_csv("/drive/MyDrive/cad/output1/prediction")


['nv' 'les' 'les' ... 'nv' 'les' 'les']
['0' '1' '1' ... '0' '1' '1']


In [None]:
# Saving NumPy array as a csv file
pd.DataFrame(y_pred).to_csv("/drive/MyDrive/cad/output1/Challenge1_prediction.csv")
