In [84]:
# Dataset processing automation

import numpy as np
import pandas as pd
from spectral import *

In [108]:
# METHODS

# READING DATA 

def read_data(filename):
    # filename will be a int
    if filename<100:
        cube = open_image("D:/data/0"+str(filename)+"/capture/0"+str(filename)+".hdr")
        dark = open_image("D:/data/0"+str(filename)+"/capture/DARKREF_0"+str(filename)+".hdr")
        white = open_image("D:/data/0"+str(filename)+"/capture/WHITEREF_0"+str(filename)+".hdr")
    else:
        cube = open_image("D:/data/"+str(filename)+"/capture/"+str(filename)+".hdr")
        dark = open_image("D:/data/"+str(filename)+"/capture/DARKREF_"+str(filename)+".hdr")
        white = open_image("D:/data/"+str(filename)+"/capture/WHITEREF_"+str(filename)+".hdr")
    return cube.load(), dark.load(), white.load()

# NORMALIZATION
def calculate_reflectance(raw, black, white):
    r = np.array(raw)
    b = np.array(black)
    w = np.array(white)
    return (r-b)/(w-b)

# SEGMENTATION
def find_ham(data):
    # Mask
    d = np.array(data)
    low_ref= data[:,:,10]
    high_ref = data[:,:,150]
    m_ref = high_ref - low_ref
    mask = np.array(m_ref>0.3)
    #res=imshow(mask)
    # Mask application
    for i in range(0,d.shape[2]):
         d[:,:,i] = d[:,:,i] * mask
    #res1 = imshow(d[:,:,50])
    return d

# ORGANIZING DATA
def extract_pixels(data):
    d = data.reshape(-1,data.shape[2])
    df = pd.DataFrame(data = d)
    df.columns = [f'band{i}' for i in range(1, 1+d.shape[1])]
    return df

def clean_null_pixels(dataframe):
    return dataframe[(dataframe.T != 0.0).any()]
    #print(dataframe.head())
    
def spectral_signature(dataframe):
    return list(dataframe.mean(axis=0))


In [110]:
# DATASET CREATION
from sys import stdout

spectral_samples = []

for i in range(10,114):
    #START
    #print(f"Reading data {i}")
    raw, dark, white = read_data(i)
    r = calculate_reflectance(raw, dark, white)
    ham = find_ham(r)
    df = extract_pixels(ham)
    df1 = clean_null_pixels(df)
    ss = spectral_signature(df1)
    #print(ss)
    spectral_samples.append(ss)
    
    comp = (i-10)/103 * 100
    # Trick to update status on the same line
    stdout.write("\r%d%% completed" % comp)
    stdout.flush()
    


100% completed

In [111]:

print(len(spectral_samples))

104


In [113]:
samples = pd.DataFrame(data=spectral_samples)
samples.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,194,195,196,197,198,199,200,201,202,203
0,0.284257,0.249366,0.224309,0.204326,0.189497,0.178146,0.168257,0.161248,0.156066,0.152212,...,0.444226,0.447807,0.454861,0.462858,0.463716,0.476962,0.481532,0.486014,0.495068,0.488017
1,0.270824,0.235305,0.205166,0.18543,0.169601,0.156309,0.146676,0.139072,0.132898,0.128366,...,0.426699,0.431991,0.439993,0.44034,0.45206,0.456321,0.473419,0.481597,0.490299,0.474413
2,0.278932,0.237543,0.210911,0.187438,0.1704,0.157867,0.14644,0.138468,0.132184,0.127973,...,0.410002,0.414244,0.414589,0.423431,0.434509,0.439365,0.456173,0.462708,0.472729,0.478105
3,0.349498,0.300779,0.267037,0.240654,0.222573,0.20915,0.197557,0.189338,0.183519,0.18026,...,0.516891,0.521007,0.53023,0.529855,0.54125,0.558598,0.562306,0.582928,0.578221,0.579865
4,0.26825,0.22901,0.201534,0.179562,0.162913,0.151118,0.141468,0.13397,0.128221,0.125406,...,0.409194,0.41308,0.420535,0.429221,0.439037,0.443499,0.458375,0.464223,0.473092,0.461016


In [114]:
samples.columns = [f'band{i}' for i in range(1, 1+len(spectral_samples[0]))]

In [115]:
samples.head()

Unnamed: 0,band1,band2,band3,band4,band5,band6,band7,band8,band9,band10,...,band195,band196,band197,band198,band199,band200,band201,band202,band203,band204
0,0.284257,0.249366,0.224309,0.204326,0.189497,0.178146,0.168257,0.161248,0.156066,0.152212,...,0.444226,0.447807,0.454861,0.462858,0.463716,0.476962,0.481532,0.486014,0.495068,0.488017
1,0.270824,0.235305,0.205166,0.18543,0.169601,0.156309,0.146676,0.139072,0.132898,0.128366,...,0.426699,0.431991,0.439993,0.44034,0.45206,0.456321,0.473419,0.481597,0.490299,0.474413
2,0.278932,0.237543,0.210911,0.187438,0.1704,0.157867,0.14644,0.138468,0.132184,0.127973,...,0.410002,0.414244,0.414589,0.423431,0.434509,0.439365,0.456173,0.462708,0.472729,0.478105
3,0.349498,0.300779,0.267037,0.240654,0.222573,0.20915,0.197557,0.189338,0.183519,0.18026,...,0.516891,0.521007,0.53023,0.529855,0.54125,0.558598,0.562306,0.582928,0.578221,0.579865
4,0.26825,0.22901,0.201534,0.179562,0.162913,0.151118,0.141468,0.13397,0.128221,0.125406,...,0.409194,0.41308,0.420535,0.429221,0.439037,0.443499,0.458375,0.464223,0.473092,0.461016


In [116]:
from pandas import ExcelWriter
from pandas import ExcelFile
salt_data = pd.read_excel('D:/data/presunto.xlsx', sheet_name='Sheet1', engine='openpyxl')

In [117]:
salt_data.head()

Unnamed: 0,amostras,massa (g),Periodo (min),Temp,uS/cm,mS/cm,ppm,m soluto (mg),m soluto (g),% real presunto,...,Params,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21
0,0.0,,0.0,,,,,,,0.638805,...,,,,,,,,,,
1,1.0,10.59,0.0,21.0,1625.0,1.625,812.5,81.25,0.08125,0.767233,...,int. Time,25ms,,,,,,,,
2,2.0,10.78,0.0,19.0,1403.0,1.403,701.5,70.15,0.07015,0.650742,...,reference,Teflon bar,,,,,,,,
3,3.0,9.8,0.0,19.5,1136.0,1.136,568.0,56.8,0.0568,0.579592,...,Light,250W halogen,,,,,,,,
4,4.0,9.8,0.0,20.0,1093.0,1.093,546.5,54.65,0.05465,0.557653,...,Working distance,50cm,,,,,,,,


In [118]:
salt = salt_data['% real presunto']
salt = list(salt)[:104]
print(salt)

[0.6388050629729101, 0.767233238904627, 0.6507421150278293, 0.5795918367346939, 0.5576530612244898, 7.523364485981309, 7.874597207303971, 7.566502463054187, 7.730093071354706, 6.267688679245283, 6.377649325626204, 8.97252090800478, 7.585653104925054, 8.962566844919786, 8.01384451544196, 5.797491039426523, 8.23469387755102, 7.196569920844327, 8.203124999999998, 5.601965601965602, 8.372781065088757, 0.0, 9.323040380047507, 8.18399044205496, 7.3228995057660615, 13.88888888888889, 11.41425389755011, 14.01766004415011, 12.411347517730496, 13.270958083832335, 12.9182156133829, 13.286334056399133, 11.179645335389358, 14.047866805411033, 12.476280834914611, 10.427350427350428, 11.686643835616438, 8.7, 11.741016109045848, 13.1993006993007, 11.071428571428573, 10.446685878962535, 10.382483370288249, 8.703220191470844, 11.755424063116369, 11.641337386018238, 11.049538203190597, 9.21680993314231, 8.452914798206278, 10.211267605633804, 8.014301430143014, 12.02808112324493, 9.988262910798122, 13.323

In [119]:
samples['Salt'] = salt
samples.head()

Unnamed: 0,band1,band2,band3,band4,band5,band6,band7,band8,band9,band10,...,band196,band197,band198,band199,band200,band201,band202,band203,band204,Salt
0,0.284257,0.249366,0.224309,0.204326,0.189497,0.178146,0.168257,0.161248,0.156066,0.152212,...,0.447807,0.454861,0.462858,0.463716,0.476962,0.481532,0.486014,0.495068,0.488017,0.638805
1,0.270824,0.235305,0.205166,0.18543,0.169601,0.156309,0.146676,0.139072,0.132898,0.128366,...,0.431991,0.439993,0.44034,0.45206,0.456321,0.473419,0.481597,0.490299,0.474413,0.767233
2,0.278932,0.237543,0.210911,0.187438,0.1704,0.157867,0.14644,0.138468,0.132184,0.127973,...,0.414244,0.414589,0.423431,0.434509,0.439365,0.456173,0.462708,0.472729,0.478105,0.650742
3,0.349498,0.300779,0.267037,0.240654,0.222573,0.20915,0.197557,0.189338,0.183519,0.18026,...,0.521007,0.53023,0.529855,0.54125,0.558598,0.562306,0.582928,0.578221,0.579865,0.579592
4,0.26825,0.22901,0.201534,0.179562,0.162913,0.151118,0.141468,0.13397,0.128221,0.125406,...,0.41308,0.420535,0.429221,0.439037,0.443499,0.458375,0.464223,0.473092,0.461016,0.557653


In [120]:
samples.to_csv("D:/data/salt_content_ham2.csv")