# ***Some notes***

The notebook includes the steps of importing images, calculating morphological and textural features.
 

*   Initial images may be in any format, but the code may need changes. Currently, it is using cv's imread (see [documentation](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56)).
*   Images as processed as grayscale. If multi-channel, channels will be merged according to current code. If the analysis is intended to look at individual channels, split image into different files.
*   The code also includes to option to import a label, in the format csv (with image name and numerical label). This is, of course, not a mandatory step; feature extraction can be performed without classification.
*   GLCM features cannot be calculated in float images. As such, intensity values may have to be normalized prior to feature calculation.


# Import Images

In [None]:
#initial imports
import pandas as pd
import cv2
from os import listdir

In [None]:
#if data in drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#open images and create dataset with images

df = pd.DataFrame(columns = ['Patch', 'Image Name']) #patch is ROI to be imported


patches = []
names = []

mypath = 'PATH_TO_IMAGE_FOLDER'

imglist = listdir(mypath)

for img in imglist:
  path = mypath + '/' + img
  patch = cv2.imread(path, cv2.IMREAD_GRAYSCALE) #read image as greyscale
  
  patches.append(patch)
  names.append(img)

df['Patch'] = patches #saves roi
df['Image Name'] = names #saves image name


df

In [None]:
#get labels

labels = pd.read_csv('PATH_TO_FILE.csv')

label_df = pd.DataFrame( columns = ['Image Name', 'Label'])

label_list = []
img_list = []

#tuns through label table
for ind, row in labels.iterrows():
  label_list.append(row['Label'])
  img_list.append(row['Image'])

label_df['Image Name'] = img_list
label_df['Label'] = label_list

#now, we sort both dataframes acoording to image name, to assure label matching

label_df = label_df.sort_values(by = ['Image Name'])
df = df.sort_values(by = ['Image Name'])
#we can now merge both dfs

df['Label'] = label_df['Label'].values

In [None]:
#save df into pickle database

df.to_pickle('OUTPUT_PATH/FILENAME.pickle')
df

# Calculate Features

In [None]:
#initial imports -- includes py files for feature calculation
#!pip install mahotas
import pandas as pd
import numpy as np
from RegionPropsMorph import RegionPropsMorph
from RegionPropsInt import RegionPropsInt
from FreqAnalysis import FreqAnalysis
from GLCM import GLCM

In [None]:
#if data in drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#read the pandas dataframe

df_patches = pd.read_pickle('OUTPUT_PATH/FILENAME.pickle')
df_patches

In [None]:
#calculate features


df_base = df_patches['Patch']

df = pd.DataFrame(data = df_patches.values, columns = df_patches.columns)

final_morph = []
final_int = []
final_glcm = []
final_freqs = []


for index, row in enumerate(df_base):

        patch = row
      
        #calculate features
        propsmorph = RegionPropsMorph(patch)
        propsint = RegionPropsInt(patch)
        glcm = GLCM(patch)
        freqs = FreqAnalysis(patch)
        
        #obtain feature labels (invariant) and their values
        morph_labels, morph_values = propsmorph.print_features(print_values = False) #change to true to see feature values
        int_labels, int_values = propsint.print_features(print_values = False)
        glcm_labels, glcm_values = glcm.print_features(print_values = False)
        freqs_labels, freqs_values = freqs.print_features(print_values = False)

        #add each feature to its respective list, which will then be put into the dataframe
        for i in np.arange(len(morph_values)):
          if len(final_morph) < len(morph_values):
                  final_morph.append([morph_values[i]])
          else:
              final_morph[i].append(morph_values[i])

        for i in np.arange(len(int_values)):
          if len(final_int) < len(int_values):
                  final_int.append([int_values[i]])
          else:
              final_int[i].append(int_values[i])
              
        for i in np.arange(len(glcm_values)):
            if len(final_glcm) < len(glcm_values):
                    final_glcm.append([glcm_values[i]])
            else:
                final_glcm[i].append(glcm_values[i])

        for i in np.arange(len(freqs_values)):
            if len(final_freqs) < len(freqs_values):
                    final_freqs.append([freqs_values[i]])
            else:
                final_freqs[i].append(freqs_values[i])

#add features to dataframe and visualize

for i in np.arange(len(morph_labels)):
    df[morph_labels[i]] = final_morph[i]

for i in np.arange(len(int_labels)):
    df[int_labels[i]] = final_int[i]
    
for i in np.arange(len(glcm_labels)):
    df[glcm_labels[i]] = final_glcm[i]

for i in np.arange(len(freqs_labels)):
    df[freqs_labels[i]] = final_freqs[i]


df

In [None]:
#save the new dataframe

df.to_pickle('PATH_TO_OUTPUT/FEATURE_DATABASE.pickle')

#if wanting to save to xlsx file, uncomment the following:
#excel_name = 'all.xlsx'
#df_path =  excel_name
#df.to_excel(df_path) 



# Data Visualisation (incomplete)

In [None]:
#initial imports
#!pip install statannot
import numpy as np
import pandas as pd
import os
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import scipy.stats as stats
from termcolor import colored
from statannot import add_stat_annotation
import pickle


In [None]:
#if data in drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#import dataframe

df = pd.read_pickle('PATH_TO_OUTPUT/FEATURE_DATABASE.pickle')
df.index = np.arange(df.shape[0]) #not mandatory. only if any images are excluded in intermediate steps

df

In [None]:
#Getting labels

df_labels = df['Label']

(3, 1)
(2, 1)
(3, 1)


Unnamed: 0,Label
0,0
1,0
2,1
3,0
4,1
5,2
6,2
7,2
8,3
9,3
