In [13]:
import numpy as np
import tempfile
from shutil import copy
import pandas as pd
from multiprocessing import Pool
import subprocess
import scipy.spatial.distance as ssd

In [2]:
import timeit
import matplotlib.pyplot as plt
from function import *

20.2788001


# Approach
1. Generate list of maps to compare
1. Generate list of all required comparisons
1. Pre-load all csv maps into Python
1. Select map pair to compare
1. Create temp folder
1. Provide temp folder with maskland and mask file
1. Generate asc version of csv maps that are to be compared
1. Generate CSL and log file for the comparison and copy to temp folder
1. Copy legends folder to temp folder
1. Run comparison in MCK
1. Extract required stats from outputfile
1. Delete temp folder with all files
1. Go back and repeat from step 4 until all comparisons have been run
1. Export all data to disk
<img src="PythonPipeline.png">

# Help Functions
<b><font size="4">Any function not defined here can be found in function.py</font></b><br>
This distinction was necessary as pool would not run with the function defined in the notebook.

In [12]:
def single_df(maps, stats, metric):
    """This function takes the output from the run comparisons function for single map comparison metrics and returns the distance matrix"""
    #create base df to form basis for distance matrix
    df = pd.DataFrame(index=maps)
    df[metric] = stats
    #calculate euclidean distance between all values then change to matrix form
    matrix = ssd.squareform(ssd.pdist(df))
    df_clean = pd.DataFrame(matrix, index=single_maps, columns=single_maps)
    
    #save df to disk
    csv_name = csv_dir + metric + '_df.csv'
    df_clean.to_csv(csv_name, index=False)
    return df_clean

def multi_df(map1, map2, stats, metric):
    """This function takes the output from the run comparisons function for multi map comparison metrics and returns the distance matrix"""
    #Create two dataframes with swapped map columns
    df = pd.DataFrame()
    df['map1'] = [x for x in map1]
    df['map2'] = [x for x in map2]
    df[metric] = stats
    df2 = df
    df2 = df2[[ 'map2', 'map1', metric]]
    df2.columns = ['map1', 'map2', metric]      
    df_concat = pd.concat([df, df2])
    df_pivot = df_concat.pivot(index='map2', columns='map1', values=metric)
    
    #clean up the presentation
    #Remove unecessary labeling
    index = df_pivot.index.union(df_pivot.columns)
    df_clean = df_pivot.reindex(index=index, columns=index)
    #reindex to correct numerical order
    ordered = df_clean.index.to_series().str.rsplit('p').str[-1].astype(int).sort_values()
    df_clean = df_clean.reindex(index=ordered.index, columns=ordered.index).fillna(1).round(decimals=3)
    
    #save df to disk
    csv_name = csv_dir + metric + '_df.csv'
    df_clean.to_csv(csv_name, index=False)
    return df_clean


# Setup Variables Main

In [4]:
#directory where the dataframes containing output are stored
csv_dir = 'C:/LUMOS/MCK/Output_DFs/'

#generate metric lists for all comparisons - move them after creation in nb
# kfuzzy_id = ['kfuzzy'] * len(map_pairs)
# alloc_id = ['alloc'] * len(map_pairs)
# quant_id = ['quant'] * len(map_pairs)
# clump_id = ['clump'] * len(map_list)
# nrpatch_id = ['nrpatch'] * len(map_list)
# simpson_id = ['simpson'] * len(map_list)

# Metric_ID

This list contains the metrics available for analysis and the metric_id that needs to be assigned to the metric_id list to generate the desired output, and the value passed to the extract stats function:<br>

| Metric                        |   ID   | stats id |              note             |
|-------------------------------|--------|----------|-------------------------------|
|   Kappa                       | kappa  |     0    |                               |
|   Overall Accuracy            | kappa  |     0    | this is calculated with kappa |
|   Fuzzy Kappa                 | kfuzzy |     1    |                      |
|   Allocation Disagreement     | alloc  |     2    |                      | 
|   Quantity Disagreement       | quant  |     3    |                      | 
|   Fractal Dimension           | fractal|     4    |                      |
|   Clumpiness                  | clump  |     5    |                      |
|   Number of Patches           | nrpatch|     6    |                      |
|   Simpson's Diversity Index   | simpson|     7    |                      |

# Run Kappa and Overall Accuracy

In [None]:
# Create a zipped list to pass to the comparison function containing information about the maps and the metric to use
kappa_id = ['kappa'] * len(map_pairs)
pair_id = list(np.arange(0,len(map_pairs),1))
pairs = zip(pair_id, kappa_id)
num_processors = 4
p=Pool(processes=num_processors)

## Run with ascs reloading (edit out numpy2asc loop bottom of function.py)

In [None]:
#store output as list
acc = []
kappa = []
runtime = []
kappa_maps1 = []
kappa_maps2 = []

start = timeit.default_timer()

for n1, n2, ac, kap in p.starmap(run_comparisons, pairs):
    kappa_maps1.append(n1)
    kappa_maps2.append(n2)
    acc.append(ac)
    kappa.append(kap)  
stop = timeit.default_timer()

print(stop - start)
# Create df for kappa and save it to disk
df_kappa = multi_df(kappa_maps1, kappa_maps2, kappa, 'kappa')
df_kappa.head(5)
# Create df for overal accuracy and save it to disk
df_acc = multi_df(kappa_maps1, kappa_maps2, acc, 'accuracy')
df_acc.head(5)

## Run with ascs preloaded (edit out map_pairs in function.py)

In [None]:
#store output as list
acc = []
kappa = []
runtime = []
kappa_maps1 = []
kappa_maps2 = []

start = timeit.default_timer()

for n1, n2, ac, kap in p.starmap(run_comparisons2, pairs):
    kappa_maps1.append(n1)
    kappa_maps2.append(n2)
    acc.append(ac)
    kappa.append(kap)  
stop = timeit.default_timer()

print(stop - start)
# Create df for kappa and save it to disk
df_kappa = multi_df(kappa_maps1, kappa_maps2, kappa, 'kappa')
df_kappa.head(5)
# Create df for overal accuracy and save it to disk
df_acc = multi_df(kappa_maps1, kappa_maps2, acc, 'accuracy')
df_acc.head(5)

In [None]:
#p.starmap(run_comparisons, pairs)

# Run Fractal Dimension

In [5]:
# Create a zipped list to pass to the comparison function containing information about the maps and the metric to use
fractal_id = ['fractal'] * len(map_list)
pair_id = list(np.arange(0,len(map_list),1))
pairs = zip(pair_id, fractal_id)
num_processors = 4
p=Pool(processes=num_processors)

In [6]:
#store output as list
frac = []
frac_maps = []

start = timeit.default_timer()

for n1, n2, fr1, fr2 in p.starmap(run_comparisons, pairs):
    frac_maps.append(n1)
    frac_maps.append(n2)
    frac.append(fr1)
    if fr2 != 999:
        frac.append(fr2)
stop = timeit.default_timer()

print(stop - start)

# Create df for fractal dimension and save it to disk
df_frac = single_df(frac_maps, frac, 'fractal')
df_frac

50.76012849999999
