<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Pipeline" data-toc-modified-id="Pipeline-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Pipeline</a></span></li><li><span><a href="#new-test" data-toc-modified-id="new-test-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>new test</a></span></li></ul></div>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

In [11]:
import os
from skimage import io
from skimage import transform, feature

In [None]:
data = pd.read_csv('Datasets/trainHousePrices.csv')
cat_cols = data.select_dtypes('object').columns
data.drop(columns=cat_cols, inplace=True)
data.dropna(axis = 0, how ='any',inplace=True) 
data.shape

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(data.drop(columns=['SalePrice']), data['SalePrice'], test_size=0.2, random_state=33)
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
rfe = RFE(estimator=DecisionTreeRegressor(), n_features_to_select=5)
rfe.fit(xtrain, ytrain)
feature_list=[]
for i, col in zip(range(xtrain.shape[1]), xtrain.columns):
    if rfe.ranking_[i] == 1:
        feature_list.append(col)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
class Metrics:
    def evaluate(self, model, features, target):
        ypred = model.predict(features)
        mae = mean_absolute_error(y_true=target, y_pred=ypred)
        mse = mean_squared_error(y_true=target, y_pred=ypred)
        r2 = r2_score(y_true=target, y_pred=ypred)*100
        print(f"MAE :: {mae: .4f}")
        print(f"MSE :: {mse: .4f}")
        print(f"R2 :: {r2: .4f}")
        return [np.round(mae, 4), np.round(mse, 4), np.round(r2, 4)]
evaluator = Metrics()

In [None]:
class OutlierTreatment:
    
    def __init__(self, dff):
        self.dff = dff
    
    def outlier(self):
        Q1 = self.dff.quantile(0.25)
        Q3 = self.dff.quantile(0.75)
        IQR = Q3 - Q1
        lower_limit = Q1 - 1.5*IQR
        upper_limit = Q3 + 1.5*IQR
        return lower_limit, upper_limit
        
    def countoutlier(self, dfx):
        self.lower_limit, self.upper_limit = self.outlier()
        Total_outlier = len(dfx[(self.dff <= self.lower_limit)|(self.dff >= self.upper_limit)])
        return Total_outlier
    
    def cleanoutlier(self, dfx, dfy):
        self.lower_limit, self.upper_limit = self.outlier()
        outliers = dfx[(self.dff <= self.lower_limit)|(self.dff >= self.upper_limit)]
        dfx.drop(outliers.index, inplace=True)
        dfy.drop(outliers.index, inplace=True)
        return dfx, dfy

In [None]:
print(xtrain[feature_list].shape)
print(ytrain.shape)
print("After removing outlier")
# checking total outlier in every feature column
for i in xtrain[feature_list].columns:
    d = OutlierTreatment(xtrain[i])
    if d.countoutlier(xtrain) < 5:
        d.cleanoutlier(xtrain, ytrain)
print(xtrain[feature_list].shape)
print(ytrain.shape)

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

In [None]:
xtrain1 = xtrain[feature_list].copy()
xtest1 = xtest[feature_list].copy()

In [None]:
scaler = StandardScaler()
# fit and transform are both applied on training data
xtrain[feature_list] = scaler.fit_transform(xtrain[feature_list])
# only transform is applied on test data as features used to fit training data is applied in test data to perform transform 
xtest[feature_list] = scaler.transform(xtest[feature_list])

In [None]:
Linearmodel1 = LinearRegression()
Linearmodel1.fit(xtrain[feature_list], ytrain)

In [None]:
evaluator = Metrics()
evaluator.evaluate(Linearmodel1, xtrain[feature_list], ytrain)

In [None]:
evaluator.evaluate(Linearmodel1, xtest[feature_list], ytest)

# Pipeline

In [None]:
from sklearn.pipeline import Pipeline
pipeline = Pipeline([('ScalerObject', StandardScaler()), ('ModelName', LinearRegression())], verbose=True)
pipeline.fit(xtrain1[feature_list], ytrain)

In [None]:
evaluator.evaluate(model=pipeline, features=xtrain1[feature_list], target=ytrain)

In [None]:
evaluator.evaluate(model=pipeline, features=xtest1[feature_list], target=ytest)

In [None]:
xtrain1[feature_list].head()

In [None]:
xtest1[feature_list].head()

# new test

In [13]:
image_id = 'DSC_0365.JPG'
path = os.path.join(r'Datasets\crops_test\Bacterial_leaf_blight', image_id)
image = io.imread(path)
resized_img = transform.resize(image, output_shape=(64,64))
resized_img.shape

(64, 64, 3)

In [45]:
resized_img

array([[[0.89485325, 0.83851504, 0.81789253],
        [0.89398881, 0.83723754, 0.81727361],
        [0.8918832 , 0.8388547 , 0.81807434],
        ...,
        [0.89941971, 0.84357442, 0.82150739],
        [0.89724369, 0.84115237, 0.82120664],
        [0.8982708 , 0.84177405, 0.82190821]],

       [[0.89479781, 0.83600028, 0.8155353 ],
        [0.89183272, 0.83614189, 0.81640463],
        [0.89048233, 0.83706812, 0.81681097],
        ...,
        [0.9014021 , 0.84212211, 0.82090004],
        [0.89759691, 0.83970371, 0.82036974],
        [0.89620128, 0.84012427, 0.82038281]],

       [[0.89224508, 0.83681492, 0.81693112],
        [0.89059558, 0.83694467, 0.81798508],
        [0.89110041, 0.83751571, 0.81773497],
        ...,
        [0.90155452, 0.84327672, 0.82159582],
        [0.89962227, 0.84017274, 0.82048695],
        [0.89678722, 0.83935588, 0.81943701]],

       ...,

       [[0.89376435, 0.83924015, 0.818184  ],
        [0.89561038, 0.84141238, 0.81943547],
        [0.89086291, 0

In [47]:
resized_img = resized_img.flatten()

In [48]:
type(resized_img)

numpy.ndarray

In [49]:
feature_vector = feature.hog(resized_img.reshape(64,64,3), channel_axis=-1)

In [50]:
type(feature_vector)

numpy.ndarray

In [51]:
feature_vector.shape

(2916,)

In [21]:
from joblib import dump, load
image_classifier = load('Models/cropDiseaseFinalModel.pkl')

In [52]:
feature_vector 

array([0.12081085, 0.08550791, 0.02190302, ..., 0.01969084, 0.02721586,
       0.02180824])

In [36]:
# Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
feature_vector1 = feature_vector.copy()
feature_vector1 = feature_vector1.reshape(1, -1)
feature_vector1

array([[0.12081085, 0.08550791, 0.02190302, ..., 0.01969084, 0.02721586,
        0.02180824]])

In [37]:
type(feature_vector1)

numpy.ndarray

In [38]:
feature_vector1.shape

(1, 2916)

In [40]:
ypred_test = image_classifier.predict_proba(feature_vector1)

In [44]:
ypred_test[0].max()

0.9999831650836787

In [53]:
class PreProcessor:
    def __init__(self)->None:
        pass
        
    def process_and_normalize(self, image_id: str):
        path = os.path.join(r'Datasets\crops_test\Bacterial_leaf_blight', image_id)
        try:
            image = io.imread(path)           
        except FileNotFoundError as fe:
            return ('Image not found!', False)

        resized_img = transform.resize(image, output_shape=(64,64))
        return resized_img.flatten()

In [62]:
from sklearn.base import BaseEstimator, TransformerMixin

class hog_transformation(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    # don't need this in this pipeline, there is no operation needed on y, that's why it is none 
    def fit(self, X, y=None):
        self.X = X
        self.y = y
        return self
  
    def transform(self, X, y=None):
        #hog_transform = lambda image: feature.hog(image.reshape(64,64,3), channel_axis=-1)
        #feature_vector = np.array(list(map(hog_transform, X)))
        feature_vector = feature.hog(X.reshape(64,64,3), channel_axis=-1)
        # if data has a single feature or it contains a single sample then array.reshape(1, -1)
        feature_vector = feature_vector.reshape(1, -1)
        return feature_vector

In [66]:
from sklearn.pipeline import Pipeline
pipeline = Pipeline([
    ('feature_extractor', hog_transformation()), 
    ('MLPClassifier', image_classifier)
])

In [67]:
preprocessor = PreProcessor()

In [68]:
def get(image_id: str):
    vector = preprocessor.process_and_normalize(image_id=image_id)
    images_vector = np.array(vector)
    return images_vector

In [59]:
output = get(image_id)

In [60]:
output

array([0.89485325, 0.83851504, 0.81789253, ..., 0.89312131, 0.84112241,
       0.82061944])

In [69]:
def get(image_id: str):
    vector = preprocessor.process_and_normalize(image_id=image_id)
    images_vector = np.array(vector)
    ypred_test = pipeline.predict_proba(images_vector)[0]
    diseases = ['Bacterial_leaf_blight', 'Brown_spot', 'Leaf_smut']
    return {
            'disease': diseases[ypred_test.argmax()],
            'confidence': ypred_test.max()
        }

In [70]:
get(image_id)

{'disease': 'Bacterial_leaf_blight', 'confidence': 0.9999831650836787}

In [87]:
class PreProcessor:
    def __init__(self)->None:
        pass
        
    def process_and_normalize(self, image_id: str):
        path = os.path.join(r'Datasets\crops_test\Bacterial_leaf_blight', image_id)
        try:
            image = io.imread(path)           
        except FileNotFoundError as fe:
            return ('Image not found!', False)

        resized_img = transform.resize(image, output_shape=(64,64))
        resized_img = resized_img.flatten()
        #return (None, True)
        #return resized_img
        return {
            'disease': 'c',
            'confidence': 'd'
        }

In [90]:
image_id = 'DSC_0365.JPG'
preprocessor = PreProcessor()
ok = preprocessor.process_and_normalize(image_id=image_id)

In [92]:
type(ok)

dict

In [94]:
result = {
            'disease': 's',
            'confidence': 'd'
        }

In [95]:
type(result)

dict