#   Overview
In this notebook we load the data and view different images to get a better idea about the challenge we are facing. This is always a very helpful first step. It is also important that you can see and try to make some of your own predictions about the data. If you cannot see differences between the groups it is going to be difficult for a biomarker to capture that (but not necessarily impossible)

Install the PyRadiomics package read more about it here (https://pyradiomics.readthedocs.io/en/latest/)

In [None]:
# special functions for using pyradiomics
from SimpleITK import GetImageFromArray
import radiomics
from radiomics.featureextractor import RadiomicsFeatureExtractor # This module is used for interaction with pyradiomic
import logging
logging.getLogger('radiomics').setLevel(logging.CRITICAL + 1)  # this tool makes a whole TON of log noise

### Setup the PyRadiomics Code

In [None]:
# Instantiate the extractor
texture_extractor = RadiomicsFeatureExtractor(verbose=False)
texture_extractor.disableAllFeatures()
_text_feat = {ckey: [] for ckey in texture_extractor.featureClassNames}
texture_extractor.enableFeaturesByName(**_text_feat)

print('Extraction parameters:\n\t', texture_extractor.settings)
print('Enabled filters:\n\t', texture_extractor.enabledImagetypes) 
print('Enabled features:\n\t', texture_extractor.enabledFeatures) 

In [None]:
import numpy as np # for manipulating 3d images
import pandas as pd # for reading and writing tables
import h5py # for reading the image files
import skimage # for image processing and visualizations
import sklearn # for machine learning and statistical models
import os # help us load files and deal with paths
from pathlib import Path # help manage files
import glob
import cv2

### Plot Setup Code
Here we setup the defaults to make the plots look a bit nicer for the notebook

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams["figure.dpi"] = 125
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})

# Load the Training Data
We start with the training data since we have labels for them and can look in more detail

In [None]:
train_df=pd.read_csv('D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus\\Train.csv')
#train_df = pd.read_csv('../input/train.csv')
train_df.head(10) # show the first 5 lines

# Read Image

In [None]:
  def read_scan(in_filename, folder='Tomato Yellow Leaf Curl Virus'):
    full_scan_path = os.path.join('D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus', in_filename)
    # load the image using jpg
    data=str(full_scan_path)
    images=cv2.imread(data)
    return images

# Load a Scan 
- the data on kaggle are located in a parent folder called input. 
- Since the files have been organized into train and test we use the train folder

In [None]:
sample_scan = train_df.iloc[0] # just take the first row
print(sample_scan)
image_data = read_scan(sample_scan['image_path'])
print('Image Shape:', image_data.shape)

In [None]:
type(image_data)

# Image Lavel
- Check the each image error label


In [None]:
length=len(train_df)
i=0
while(i<length):    
    sample_scan = train_df.iloc[i]#just take the first row
    print(sample_scan)
    image_data = read_scan(sample_scan['image_path'])
    print('Image Shape:', image_data.shape)
    #print(image_data)
    results = texture_extractor.execute(GetImageFromArray(image_data),
                                GetImageFromArray((image_data>0).astype(np.uint8)))
    i+=1
    print("image No:"+str(i)+"ok")

# Calculate Radiomic Features
Calculate the radiomic features for the test scan

In [None]:
# we take a mask by just keeping the part of the image greater than 0
plt.imshow(np.sum((image_data>60).astype(float), -1))

In [None]:
%%time
results = texture_extractor.execute(GetImageFromArray(image_data),
                            GetImageFromArray((image_data>0).astype(np.uint8)))

In [None]:
pd.DataFrame([results]).T

In [None]:
def calc_radiomics(in_image_data):
    return texture_extractor.execute(GetImageFromArray(in_image_data),
                            GetImageFromArray((in_image_data>0).astype(np.uint8)))

# Run over all scans
We use the `.map` function from pandas to calculate the brightness for all the scans

In [None]:
%%time
train_df['radiomics'] = train_df['image_path'].map(lambda c_filename: calc_radiomics(read_scan(c_filename)))

In [None]:
new_train_df = pd.DataFrame([dict(**c_row.pop('radiomics'), **c_row) for _, c_row in train_df.iterrows()])
print(new_train_df.shape, 'data prepared')
new_train_df.sample(10)

# Numpy Array

In [None]:
numpyheader = np.array(new_train_df.columns)
column = np.array(new_train_df)

In [None]:
import pandas as pd
pd.DataFrame(column).to_csv(r'D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus\\dataset.csv',header=numpyheader)

# Run Train and Test data

In [None]:
from numpy.random import RandomState
df = pd.read_csv('D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus\\dataset.csv')
rng = RandomState()
train = df.sample(frac=0.8, random_state=rng) #80% traning data
test = df.loc[~df.index.isin(train.index)]  #20% test data

In [None]:
print(train,test)

In [None]:
train.to_csv('D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus\\train.csv', index=False)
train.sample(8)

In [None]:
test.to_csv('D:\\Final Semester Project\\Final Dataset\\Tomato Yellow Leaf Curl Virus\\test.csv', index=False)
test.sample(2)