# Parameter optimization for size and mod thresholds

## Import packages and set paths

In [3]:
import pandas as pd
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import cv2
import matplotlib.image as mpimg
import matplotlib.pylab as pl
import numpy.ma as ma
import datetime
import math
import scipy.optimize
import random

In [4]:
basepath = '/media/jukes/jukes1/'
sg_path = '/home/jukes/Documents/Sample_glaciers/'

## Read in analysis dates for manual and automated delinations, convert to datetime objs

In [8]:
#read in manual image dates
manual_df= pd.read_csv(basepath+'Manual/Manual_imgdates.csv', sep=',', dtype=str, header=0, usecols=[1,2,3,4])
print(manual_df.shape)
manual_df.head()

(585, 4)


Unnamed: 0,BoxID,datetimes,Line_x,Line_y
0,1,1987-10-14,"[153, 153, 151, 152, 150, 151, 149, 148, 146, ...","[132, 133, 134, 134, 135, 135, 136, 138, 139, ..."
1,1,1995-04-13,"[152, 153, 153, 153, 153, 153, 154, 154, 155, ...","[135, 136, 137, 138, 139, 140, 141, 142, 142, ..."
2,1,1995-05-31,"[153, 153, 153, 153, 153, 154, 154, 157, 158, ...","[137, 138, 139, 140, 142, 143, 145, 145, 145, ..."
3,1,2013-05-25,"[131, 131, 132, 133, 134, 134, 135, 136, 136, ...","[141, 143, 144, 144, 145, 146, 146, 146, 147, ..."
4,1,2013-05-29,"[130, 131, 130, 131, 131, 132, 134, 134, 135, ...","[142, 142, 144, 144, 145, 146, 147, 148, 148, ..."


In [11]:
#Read in datetags csv as datetime_df
automated_df = pd.read_csv(sg_path+'imgdates.csv', sep=',', dtype=str, header=0, names=['Scene', 'datetimes'])
print(automated_df.shape)
automated_df.head()

(864, 2)


Unnamed: 0,Scene,datetimes
510,LC80090132013101LGN01,2013-04-11
1612,LC80090142013101LGN01,2013-04-11
577,LC82330172013102LGN01,2013-04-12
940,LC82330152013102LGN01,2013-04-12
445,LC80080142013110LGN01,2013-04-20


In [12]:
#grab the datetime strings
datetimes = automated_df.loc[:,'datetimes']
#create list to store the datetime objects
datetime_objs = []

for date in datetimes:
    date = str(date)
    datetime_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
    datetime_obj = np.datetime64(datetime_obj)
    datetime_objs.append(datetime_obj)

#add the datetime objects back into the datetime column
automated_df['datetimes'] = datetime_objs

## READ IN MANUAL TPOSITION CALCULATIONS

## Find overlaps and select 90% for training, 10% for testing

In [13]:
overlap_df = manual_df.merge(automated_df, how='inner', on=['datetimes'])
overlap_df = overlap_df.sort_values(by='datetimes', ascending=True)
overlap_df = overlap_df.drop(['Line_x', 'Line_y', 'Scene'], axis=1)
overlap_df = overlap_df.drop_duplicates()
overlap_df.shape

ValueError: You are trying to merge on object and datetime64[ns] columns. If you wish to proceed you should use pd.concat

In [102]:
dates = []

for idx, row in overlap_df.iterrows():
    entry = row['BoxID'] + ','+str(row['datetimes'])[:-9]
#     print(entry)
    dates.append(entry)
# print(dates)

### Select 90% for training

In [222]:
N = len(dates)
print(N)

#pick a random sample of dates for training
train_dates = random.sample(dates, int(N*0.9))
print(len(train_dates))
# print(train_dates)

#grab remaining for testing
test_dates = []
for date in dates:
    if date not in train_dates:
        test_dates.append(date)
print(len(test_dates))
# print(test_dates)

#Check that they don't overlap, should return empty
print(set(train_dates).intersection(test_dates))

125
112
13
set()


In [258]:
boxes = []
imgdates = []

for td in train_dates:
    BoxID, imgdate = td.split(',')
    
    boxes.append(BoxID)
    imgdates.append(imgdate)

train_df = pd.DataFrame(list(zip(boxes, imgdates)), columns=['BoxID', 'datetime'])
train_df.head()

Unnamed: 0,BoxID,datetime
0,1,2016-07-02
1,1,2017-03-16
2,1,2016-04-03
3,277,2016-08-19
4,1,2016-09-13


In [261]:
#Merge with scene ID dataframe
#Read in scene-date dataframe
scene_date_df= pd.read_csv(sg_path+'imgdates.csv', sep=',', dtype=str, header=0, usecols=[1, 2])
print(scene_date_df.shape)
scene_date_df.head()

#Merge
train_scene_df = train_df.merge(scene_date_df, how='inner', on=['datetime'])
train_scene_df = train_scene_df.sort_values(by=['datetime'])

(864, 2)


In [262]:
train_scene_df

Unnamed: 0,BoxID,datetime,Scene
164,277,2013-04-12,LC82330172013102LGN01
165,277,2013-04-12,LC82330152013102LGN01
91,277,2013-04-28,LC82330152013118LGN01
92,277,2013-04-28,LC80160012013118LGN01
197,277,2013-05-21,LC80010152013141LGN01
198,277,2013-05-21,LC80010142013141LGN01
186,001,2013-05-25,LC80130022013145LGN00
46,277,2013-11-13,LC80010142013317LGN00
45,277,2013-11-13,LC80010152013317LGN00
19,277,2013-11-15,LC82320152013319LGN00


In [264]:
#export to csv and text
train_scene_df.to_csv(basepath+'/Manual/train.csv', sep=',', index=False, header=False)
train_scene_df.to_csv(basepath+'/Manual/train.txt', sep=' ', index=False, header=False)

## Define objective function

Example:

In [280]:
# def f(x):
#     return x**2
# minimum = scipy.optimize.fmin(f, 1)
# minimum[0]

#### Our objective function

In [281]:
DOA = '2019_12_16'

def center_dist(size_thresh, mod_thresh):
    #Calculate automated tpos
    #run terminus_pick.tcl using each of the thresholds
    terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
    print(terminus_pick)
    subprocess.call(terminus_pick, shell=True)
    
    #pull automated terminus position from the output
    #grab each output file
    differences = []
    
    for file in os.listdir(sg_path):
        if DOA in file and file.endswith('csv'):
            if len(file)>28:
                print(file)

                #read the output file in and calculate terminus position for each image
                #somehow gotta run Results script and pull the automated terminus positions
                auto_tpos = 

                #pull in manual tpos 
                man_tpos = 

                diff = abs(auto_tpos - man_tpos)
                differences.append(diff)
    
    #return objective function = distance between the two
    return np.average(differences)

IndentationError: expected an indented block (<ipython-input-281-38d79a1c7f96>, line 13)

In [2]:
os.listdir(sg_path)

NameError: name 'os' is not defined

#### Define the minimization function

In [242]:
def minimize(size_guess, mod_guess):
    minimum = scipy.optimize.fmin(center_dist, [size_guess, mod_guess], args=(size_guess, mod_guess),full_output=True)
    xopt = minimum[0][0]
    funcval = minimum[1]
    return xopt, funcval

#### Run the optimization

In [273]:
base_size_thresh = 0.8
base_mod_thresh = 0.8
thresh_range = 0.15
interval = 1000

size_guesses = np.linspace(base_size_thresh-thresh_range, base_size_thresh+thresh_range, interval)
mod_guesses = np.linspace(base_mod_thresh-thresh_range, base_mod_thresh+thresh_range, interval)

In [247]:
t_list = []
m_list = []

iterations = 5

for i in range(0, iterations-1):
    size_guess = random.choice(size_guesses)
    mod_guess = random.choice(mod_guesses)
    t, m = minimize(size_guess, mod_guess)
    t_list.append(t)
    m_list.append(m)

results_df = pd.DataFrame(list(zip(t_list, m_list)), columns=['min_th', 'min_f(th)'])

TypeError: center_dist() takes 1 positional argument but 3 were given

In [221]:
# size_thresh = 0.8
# mod_thresh = 0.7
# terminus_pick = '/home/akhalil/src/xsmurf-2.7/main/xsmurf -nodisplay /home/jukes/Documents/Scripts/terminus_pick.tcl '+str(size_thresh)+' '+str(mod_thresh)
# print(terminus_pick)
# subprocess.call(terminus_pick, shell=True)

## Cross-validation