In [15]:
import numpy as np
import cv2
import os
import pandas as pd
np.set_printoptions(suppress=True)
from matplotlib import pyplot as plt
%matplotlib inline
import time
import re
from sqlalchemy import create_engine
from sqlalchemy import Table
from sqlalchemy import Column
from sqlalchemy import MetaData
from sqlalchemy.dialects.postgresql import BYTEA
from sqlalchemy import Integer,String
from sqlalchemy import select
import pickle
import random
import itertools
import seaborn as sns
from mlxtend.plotting import plot_decision_regions
from mlxtend.plotting import category_scatter
import mahotas
##Sklearn (Model Imports)
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,RandomizedSearchCV,GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, precision_recall_curve, f1_score, roc_curve, auc
from sklearn.metrics import accuracy_score, classification_report,roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC,SVC
import sklearn
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')



In [1]:
def crop_image(img,thresh=False):
    '''
    The function 1) initially crops an image 2) finds the centroid of the image and 3) further crops the image
    based on it's centroid.
    
    Parameters:
        img (numpy array): The medical image to be cropped.
        thresh (boolean): Set thresh to True if you want to return the cropped binary image. False will return 
        the cropped regular image.
        
    Returns:
        img: The cropped image or binary image depending on parameter thresh.
        M: Moments calculated by using OpenCV's moments method.
    
    '''
    shape=img.shape
    y_crop=int(shape[0]/8)
    x_crop=int(shape[1]/20)
    img = img[y_crop:-y_crop,x_crop:-x_crop]
    ret,thresh = cv2.threshold(img,127,255,0)
    M = cv2.moments(thresh)
    cX = int(M["m10"] / M["m00"])
    cY = int(M["m01"] / M["m00"])
    top_y=int(cY/2)
    bot_y=int(((img.shape[0]-cY)/2)+cY)
    if thresh:
        img = thresh[top_y:bot_y,0:img.shape[1]]
    else:
        img = img[top_y:bot_y,0:img.shape[1]]
    return img,M

In [7]:
def horiz_structure(img):
        '''
    The function calculates the horizontal structures that exist within the image. It then returns mean of horizontal
    structures divided by the number of pixels on the x axis.
    
    Parameters:
        img (numpy array): The medical image to be cropped.

    Returns:
        horizontal value (int): The mean of the horizontal structures found divided by the number of pixels on the x axis.
    
    '''
    horiz = np.copy(img)
    horiz_col = horiz.shape[1]
    horiz_size = int(horiz_col / 20)
    horiz_Structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horiz_size,1))
    horiz = cv2.erode(horiz, horiz_Structure)
    horiz = cv2.dilate(horiz, horiz_Structure)
    return np.mean(horiz)/horiz_col

# Set Up Alchemy Engine

In [8]:
engine_aws = create_engine('postgresql://**ec2IP**/project03',echo=False)


# Create Regex To Pull Directory Name In

In [9]:
# Create regex to pull in directory name but without backslash
dis_regex=re.compile(r'\W')

# Set Up Columns List

In [10]:
col=['Hu1','Hu2','Hu3','Hu4','Hu5','Hu6','Hu7','Horiz_Structure']

for i in range(1,26):
    col=np.append(col,'Znk'+str(i))

col=np.append(col,"Class")
col = tuple(col)

# Create DataFrame with a Better Cropped Data, and run moments on it

In [16]:
test_val_split=['test/','train/','val/']
disease_dir=['DRUSEN/','NORMAL/']
dir_start='/home/ec2-user/data/OCT2017/'
count=0
temp_list=[]
#Loop through the file directory names to get the full directory name
for test_dir in test_val_split:
    for dis_dir in disease_dir:
        for filename in os.listdir(dir_start+test_dir+dis_dir):
            image_list=[]
            if filename.endswith(".jpeg"):
                count+=1
                if count%1000==0:
                    print(dir_start+test_dir+dis_dir+filename)
                    print(count)
                dict_temp={}
                ###Import Image From Directory File
                img=cv2.imread(dir_start+test_dir+dis_dir+str(filename),0)
                
                ## Crop and Resize
                img,moments=crop_image(img,thresh=True)
                
                ## Get Zernike Moments
                Zernike_Moments = mahotas.features.zernike_moments(img, radius=2)
                
                ## Get Horizontal Structure
                horiz_count=horiz_structure(img)
                
                ##Strip backslash from directory name
                dis_name=re.sub(dis_regex,'',dis_dir)
                
                #Add all metrics to a dict, convert dict to a DataFrame
                # Add Hu Moments
                image_list=cv2.HuMoments(moments).flatten()
                image_list=np.append(image_list,horiz_count)
                image_list=np.append(image_list,Zernike_Moments)
                image_list=tuple(np.append(image_list,dis_name.lower()))
                dict_temp={col[i]:image_list[i] for i in range(0,len(col))}
                temp_list.append(dict_temp)
                continue
            else:
                continue
print("Here")
df=pd.DataFrame(temp_list)

/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-2141150-28.jpeg
1000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-5969577-6.jpeg
2000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-2097858-2.jpeg
3000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-1283307-4.jpeg
4000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-1730592-8.jpeg
5000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-3276918-30.jpeg
6000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-6190971-15.jpeg
7000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-9547888-29.jpeg
8000
/home/ec2-user/data/OCT2017/train/DRUSEN/DRUSEN-3281144-8.jpeg
9000
/home/ec2-user/data/OCT2017/train/NORMAL/NORMAL-9362045-1.jpeg
10000
/home/ec2-user/data/OCT2017/train/NORMAL/NORMAL-5487005-10.jpeg
11000
/home/ec2-user/data/OCT2017/train/NORMAL/NORMAL-2618426-7.jpeg
12000
/home/ec2-user/data/OCT2017/train/NORMAL/NORMAL-8999584-3.jpeg
13000
/home/ec2-user/data/OCT2017/train/NORMAL/NORMAL-2316423-6.jpeg
14000
/home/ec2-user/data/OCT2017/train/NORM

### Import SQL To AWS

In [17]:
df.to_sql('moment_data',con=engine_aws,if_exists='replace')
