# Calculate SI from Concat Tile Reflectance Data

Extracts mean band values (6 bands) and calculates multiple common ratio and band indexes from merged_slats_field_agb_nt_mosaic_clean_start.csv
The following conditions apply:

 - env = base
 - Run following Merged Tile AGB Zonal Stats Colation.ipynb


In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
pd.set_option('chained_assignment',None)
from __future__ import division
#pd.set_option('precision', 8)

In [2]:
drive = "F"
date = "20230109"
zonal_out_date = "202301"

In [3]:
dir_ = r"{0}:\cdu\data\zonal_stats\tile\{1}".format(drive, zonal_out_date)
output_dir = r"{0}:\cdu\data\zonal_stats\output\{1}".format(drive, date)

In [4]:
# this note book will take in the combined training and validation data which has been combined and produces the 
# 
# ratios and vegetation indices for Landsat-8 (use the one written for the sentinel-2) and converts them to int32 bit data 
# 
# and reduces the significant figures to 8 decimal places. Comment out the relevent input csv file at the start and finish
#

In [5]:
# read in the zonal stats results
df = pd.read_csv(r"{0}:\cdu\data\zonal_stats\output\{1}\dbg_dp0_df_dp1_dry_dp1_annual.csv".format(drive, date), header=0)
print (list(df))
print (df.shape)

['uid', 'site', 'date', 'lon_gda94', 'lat_gda94', 'bio_l_kg1ha', 'bio_t_kg1ha', 'bio_b_kg1ha', 'bio_w_kg1ha', 'bio_br_kg1ha', 'bio_s_kg1ha', 'bio_r_kg1ha', 'bio_agb_kg1ha', 'c_l_kg1ha', 'c_t_kg1ha', 'c_b_kg1ha', 'c_w_kg1ha', 'c_br_kg1ha', 'c_s_kg1ha', 'c_r_kg1ha', 'c_agb_kg1ha', 'geometry', 'basal_dt', 'b1_dbg_count', 'b1_dbg_min', 'b1_dbg_max', 'b1_dbg_mean', 'b1_dbg_med', 'b1_dbg_std', 'b1_dbg_p25', 'b1_dbg_p50', 'b1_dbg_p75', 'b1_dbg_p95', 'b1_dbg_p99', 'b1_dbg_range', 'b2_dbg_count', 'b2_dbg_min', 'b2_dbg_max', 'b2_dbg_mean', 'b2_dbg_med', 'b2_dbg_std', 'b2_dbg_p25', 'b2_dbg_p50', 'b2_dbg_p75', 'b2_dbg_p95', 'b2_dbg_p99', 'b2_dbg_range', 'b3_dbg_count', 'b3_dbg_min', 'b3_dbg_max', 'b3_dbg_mean', 'b3_dbg_med', 'b3_dbg_std', 'b3_dbg_p25', 'b3_dbg_p50', 'b3_dbg_p75', 'b3_dbg_p95', 'b3_dbg_p99', 'b3_dbg_range', 'b4_dbg_count', 'b4_dbg_min', 'b4_dbg_max', 'b4_dbg_mean', 'b4_dbg_med', 'b4_dbg_std', 'b4_dbg_p25', 'b4_dbg_p50', 'b4_dbg_p75', 'b4_dbg_p95', 'b4_dbg_p99', 'b4_dbg_range', 'b5_

In [6]:
a = df.copy()

In [7]:
a.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167 entries, 0 to 166
Columns: 215 entries, uid to dp1a_im_e_dt
dtypes: float64(201), int64(2), object(12)
memory usage: 280.6+ KB


In [8]:
a = a[["site", 'bio_agb_kg1ha', 'b1_dbg_mean', 'b2_dbg_mean', 'b3_dbg_mean', 'b4_dbg_mean', 'b5_dbg_mean', 'b6_dbg_mean']]

In [9]:
a.columns = ["site", "total_agb", "psB1a", "psB2a", "psB3a", "psB4a", "psB5a", "psB6a", ]

In [10]:
a.dropna(inplace=True)

In [11]:
a.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 157 entries, 0 to 166
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   site       157 non-null    object 
 1   total_agb  157 non-null    float64
 2   psB1a      157 non-null    float64
 3   psB2a      157 non-null    float64
 4   psB3a      157 non-null    float64
 5   psB4a      157 non-null    float64
 6   psB5a      157 non-null    float64
 7   psB6a      157 non-null    float64
dtypes: float64(7), object(1)
memory usage: 11.0+ KB


In [12]:
# read in the zonal stats results
#a = pd.read_csv("combined_val_data_l8.csv", header=0)
#print (list(a))

In [13]:
# converts interger stage code dbg surface reflectance data for landsat 5 & 7 
# to floating point for analysis of the vegetation index, blue band not used

blue =((a['psB1a'].astype('int16')*0.0001)+0.0)

green =((a['psB2a'].astype('int16')*0.0001)+0.0)
red = ((a['psB3a'].astype('int16')*0.0001)+0.0)
nir = ((a['psB4a'].astype('int16')*0.0001)+0.0)
swir1 =((a['psB5a'].astype('int16')*0.0001)+0.0)
swir2 =((a['psB6a'].astype('int16')*0.0001)+0.0)

In [14]:
# calculate the band ratios

a['ratio32fa'] = (a['psB3a'] / a['psB2a'])
a['ratio42fa'] = (a['psB4a'] / a['psB2a'])
a['ratio43fa'] = (a['psB4a'] / a['psB3a'])
a['ratio52fa'] = (a['psB5a'] / a['psB2a'])
a['ratio53fa'] = (a['psB5a'] / a['psB3a'])
a['ratio54fa'] = (a['psB5a'] / a['psB4a'])
a['ratio62fa'] = (a['psB6a'] / a['psB2a'])
a['ratio63fa'] = (a['psB6a'] / a['psB3a'])
a['ratio64fa'] = (a['psB6a'] / a['psB4a'])
a['ratio65fa'] = (a['psB6a'] / a['psB5a'])

# calculate the band ratios and convert them to int32 bit at 7 decimal places

a['ratio32a'] = np.int32(np.around(a['ratio32fa'] * 10 ** 7))

a['ratio42a'] = np.int32(np.around(a['ratio42fa'] * 10 ** 7))

a['ratio43a'] = np.int32(np.around(a['ratio43fa'] * 10 ** 7))

a['ratio52a'] = np.int32(np.around(a['ratio52fa'] * 10 ** 7))

a['ratio53a'] = np.int32(np.around(a['ratio53fa'] * 10 ** 7))

a['ratio54a'] = np.int32(np.around(a['ratio54fa'] * 10 ** 7))

a['ratio62a'] = np.int32(np.around(a['ratio62fa'] * 10 ** 7))

a['ratio63a'] = np.int32(np.around(a['ratio63fa'] * 10 ** 7))

a['ratio64a'] = np.int32(np.around(a['ratio64fa'] * 10 ** 7))

a['ratio65a'] = np.int32(np.around(a['ratio65fa'] * 10 ** 7))

In [15]:
# calculate the vegetation indices
a['GSAVIfa'] = ((nir-green)/(nir+green+0.5))*(1.5)
a['GSAVIa'] = np.int32(np.around(a['GSAVIfa'] * 10 ** 7))

a['GNDVIfa'] = (nir-green)/(nir+green)
a['GNDVIa'] = np.int32(np.around(a['GNDVIfa'] * 10 ** 7))

a['CVIfa']= (nir/green)*(red/green)
a['CVIa'] = np.int32(np.around(a['CVIfa'] * 10 ** 7))

a['NDGIfa']= (green-red)/(green+red)
a['NDGIa'] = np.int32(np.around(a['NDGIfa'] * 10 ** 7))

a['RIfa']= (red-green)/(red+green)
a['RIa'] = np.int32(np.around(a['RIfa'] * 10 ** 7))

a['NBRfa']= (nir-swir2)/(nir+swir2)
a['NBRa'] = np.int32(np.around(a['NBRfa'] * 10 ** 7))

a['NDIIfa']= (nir-swir1)/(nir+swir1)
a['NDIIa'] = np.int32(np.around(a['NDIIfa'] * 10 ** 7))

a['GDVIfa']= (nir-green)
a['GDVIa'] = np.int32(np.around(a['GDVIfa'] * 10 ** 7))

a['MSAVIfa'] = (2 * nir + 1 - np.sqrt((np.power(((2*nir)+1) , 2)) - (8 * (nir-red))))/2
a['MSAVIa'] = np.int32(np.around(a['MSAVIfa'] * 10 ** 7))

a['DVIfa'] = (nir-red)
a['DVIa'] = np.int32(np.around(a['DVIfa'] * 10 ** 7))

a['SAVIfa']= ((nir-red)/(nir+red+0.5))*(1+0.5)
a['SAVIa'] = np.int32(np.around(a['SAVIfa'] * 10 ** 7))

a['NDVIfa']= (nir-red)/(nir+red)
a['NDVIa'] = np.int32(np.around(a['NDVIfa'] * 10 ** 7))

a['MSRfa']= (((nir/red)-1)/((np.sqrt(nir/red))+1))
a['MSRa'] = np.int32(np.around(a['MSRfa'] * 10 ** 7))

In [16]:
a

Unnamed: 0,site,total_agb,psB1a,psB2a,psB3a,psB4a,psB5a,psB6a,ratio32fa,ratio42fa,...,MSAVIfa,MSAVIa,DVIfa,DVIa,SAVIfa,SAVIa,NDVIfa,NDVIa,MSRfa,MSRa
0,nt001,0.000000,199.333333,506.555556,515.777778,2729.555556,2068.666667,1115.333333,1.018206,5.388462,...,0.379740,3797405,0.2214,2214000,0.402838,4028384,0.682491,6824908,1.301962,13019620
1,barkely01,0.000000,582.000000,805.555556,1035.444444,1550.777778,2303.333333,1525.666667,1.285379,1.925103,...,0.084014,840140,0.0515,515000,0.101846,1018457,0.199226,1992263,0.223758,2237584
3,barkely04,0.000000,509.888889,735.444444,986.777778,1604.666667,2433.333333,1572.111111,1.341743,2.181901,...,0.101358,1013578,0.0618,618000,0.122134,1221344,0.238610,2386100,0.275451,2754508
4,barkely03,0.000000,695.111111,983.777778,1378.222222,2087.888889,2796.222222,1783.555556,1.400949,2.122318,...,0.108320,1083204,0.0709,709000,0.125635,1256350,0.204618,2046176,0.230656,2306558
5,barkely06,0.000000,624.666667,839.444444,1156.000000,1823.555556,2227.888889,1348.333333,1.377101,2.172336,...,0.105990,1059899,0.0667,667000,0.125392,1253917,0.223901,2239006,0.255782,2557825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,ntadac0002,20805.217611,415.833333,596.166667,785.833333,1774.000000,1923.166667,1436.166667,1.318144,2.975678,...,0.166449,1664492,0.0989,989000,0.196256,1962561,0.386479,3864791,0.503287,5032873
163,ntaarp0001,30472.446667,389.888889,622.111111,919.666667,2298.555556,2222.666667,1387.555556,1.478300,3.694767,...,0.223038,2230377,0.1379,1379000,0.251734,2517342,0.428660,4286602,0.581311,5813109
164,ntaarp0002,24414.130222,434.000000,667.888889,1059.666667,1828.888889,2400.444444,2005.333333,1.586591,2.738313,...,0.123858,1238583,0.0769,769000,0.146253,1462533,0.266366,2663665,0.313833,3138328
165,ntaarp0003,17598.352042,469.166667,413.083333,604.083333,924.166667,2113.750000,2165.666667,1.462376,2.237240,...,0.056734,567343,0.0320,320000,0.073529,735294,0.209424,2094241,0.236851,2368514


In [17]:
a.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 157 entries, 0 to 166
Data columns (total 54 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   site       157 non-null    object 
 1   total_agb  157 non-null    float64
 2   psB1a      157 non-null    float64
 3   psB2a      157 non-null    float64
 4   psB3a      157 non-null    float64
 5   psB4a      157 non-null    float64
 6   psB5a      157 non-null    float64
 7   psB6a      157 non-null    float64
 8   ratio32fa  157 non-null    float64
 9   ratio42fa  157 non-null    float64
 10  ratio43fa  157 non-null    float64
 11  ratio52fa  157 non-null    float64
 12  ratio53fa  157 non-null    float64
 13  ratio54fa  157 non-null    float64
 14  ratio62fa  157 non-null    float64
 15  ratio63fa  157 non-null    float64
 16  ratio64fa  157 non-null    float64
 17  ratio65fa  157 non-null    float64
 18  ratio32a   157 non-null    int32  
 19  ratio42a   157 non-null    int32  
 20  ratio43a  

In [18]:
# # converts interger stage code dbg surface reflectance data for landsat 5 & 7 
# # to floating point for analysis of the vegetation index, blue band not used

# blued =((a['psB1d'].astype('int16')*0.0001)+0.0)

# greend =((a['psB2d'].astype('int16')*0.0001)+0.0)
# redd = ((a['psB3d'].astype('int16')*0.0001)+0.0)
# nird = ((a['psB4d'].astype('int16')*0.0001)+0.0)
# swir1d =((a['psB5d'].astype('int16')*0.0001)+0.0)
# swir2d =((a['psB6d'].astype('int16')*0.0001)+0.0)

In [19]:
# # calculate the band ratios

# a['ratio32fd'] = (a['psB3d'] / a['psB2d'])
# a['ratio42fd'] = (a['psB4d'] / a['psB2d'])
# a['ratio43fd'] = (a['psB4d'] / a['psB3d'])
# a['ratio52fd'] = (a['psB5d'] / a['psB2d'])
# a['ratio53fd'] = (a['psB5d'] / a['psB3d'])
# a['ratio54fd'] = (a['psB5d'] / a['psB4d'])
# a['ratio62fd'] = (a['psB6d'] / a['psB2d'])
# a['ratio63fd'] = (a['psB6d'] / a['psB3d'])
# a['ratio64fd'] = (a['psB6d'] / a['psB4d'])
# a['ratio65fd'] = (a['psB6d'] / a['psB5d'])

# # calculate the band ratios and convert them to int32 bit at 7 decimal places

# a['ratio32d'] = np.int32(np.around(a['ratio32fd'] * 10 ** 7))

# a['ratio42d'] = np.int32(np.around(a['ratio42fd'] * 10 ** 7))

# a['ratio43d'] = np.int32(np.around(a['ratio43fd'] * 10 ** 7))

# a['ratio52d'] = np.int32(np.around(a['ratio52fd'] * 10 ** 7))

# a['ratio53d'] = np.int32(np.around(a['ratio53fd'] * 10 ** 7))

# a['ratio54d'] = np.int32(np.around(a['ratio54fd'] * 10 ** 7))

# a['ratio62d'] = np.int32(np.around(a['ratio62fd'] * 10 ** 7))

# a['ratio63d'] = np.int32(np.around(a['ratio63fd'] * 10 ** 7))

# a['ratio64d'] = np.int32(np.around(a['ratio64fd'] * 10 ** 7))

# a['ratio65d'] = np.int32(np.around(a['ratio65fd'] * 10 ** 7))

In [20]:
# # calculate the vegetation indices
# a['GSAVIfd'] = ((nird-greend)/(nird+greend+0.5))*(1.5)
# a['GSAVId'] = np.int32(np.around(a['GSAVIfd'] * 10 ** 7))

# a['GNDVIfd'] = (nird-greend)/(nird+greend)
# a['GNDVId'] = np.int32(np.around(a['GNDVIfd'] * 10 ** 7))

# a['CVIfd']= (nird/greend)*(redd/greend)
# a['CVId'] = np.int32(np.around(a['CVIfd'] * 10 ** 7))

# a['NDGIfd']= (greend-redd)/(greend+redd)
# a['NDGId'] = np.int32(np.around(a['NDGIfd'] * 10 ** 7))

# a['RIfd']= (redd-greend)/(redd+greend)
# a['RId'] = np.int32(np.around(a['RIfd'] * 10 ** 7))

# a['NBRfd']= (nird-swir2d)/(nird+swir2d)
# a['NBRd'] = np.int32(np.around(a['NBRfd'] * 10 ** 7))

# a['NDIIfd']= (nird-swir1d)/(nird+swir1d)
# a['NDIId'] = np.int32(np.around(a['NDIIfd'] * 10 ** 7))

# a['GDVIfd']= (nird-greend)
# a['GDVId'] = np.int32(np.around(a['GDVIfd'] * 10 ** 7))

# a['MSAVIfd'] = (2 * nird + 1 - np.sqrt((np.power(((2*nird)+1) , 2)) - (8 * (nird-redd))))/2
# a['MSAVId'] = np.int32(np.around(a['MSAVIfd'] * 10 ** 7))

# a['DVIfd'] = (nird-redd)
# a['DVId'] = np.int32(np.around(a['DVIfd'] * 10 ** 7))

# a['SAVIfd']= ((nird-redd)/(nird+redd+0.5))*(1+0.5)
# a['SAVId'] = np.int32(np.around(a['SAVIfa'] * 10 ** 7))

# a['NDVIfd']= (nird-redd)/(nird+redd)
# a['NDVId'] = np.int32(np.around(a['NDVIfd'] * 10 ** 7))

# a['MSRfd']= (((nird/redd)-1)/((np.sqrt(nird/redd))+1))
# a['MSRd'] = np.int32(np.around(a['MSRfd'] * 10 ** 7))

In [21]:
print (list(a))

['site', 'total_agb', 'psB1a', 'psB2a', 'psB3a', 'psB4a', 'psB5a', 'psB6a', 'ratio32fa', 'ratio42fa', 'ratio43fa', 'ratio52fa', 'ratio53fa', 'ratio54fa', 'ratio62fa', 'ratio63fa', 'ratio64fa', 'ratio65fa', 'ratio32a', 'ratio42a', 'ratio43a', 'ratio52a', 'ratio53a', 'ratio54a', 'ratio62a', 'ratio63a', 'ratio64a', 'ratio65a', 'GSAVIfa', 'GSAVIa', 'GNDVIfa', 'GNDVIa', 'CVIfa', 'CVIa', 'NDGIfa', 'NDGIa', 'RIfa', 'RIa', 'NBRfa', 'NBRa', 'NDIIfa', 'NDIIa', 'GDVIfa', 'GDVIa', 'MSAVIfa', 'MSAVIa', 'DVIfa', 'DVIa', 'SAVIfa', 'SAVIa', 'NDVIfa', 'NDVIa', 'MSRfa', 'MSRa']


In [22]:
# tr = a[['site', 'x', 'y', 'chm', 'std_1', 'Min_1', 'Max_1', 'count_1', 'perc5_1', 'perc10_1', 'perc25_1', 'perc50_1', 
#         'perc75_1', 'perc80_1', 'perc95_1', 'perc99_1', 'cov', 'psB1a', 'psB2a', 'psB3a', 'psB4a', 'psB5a', 'psB6a', 
#         'psB1d', 'psB2d', 'psB3d', 'psB4d', 'psB5d', 'psB6d','ratio32a', 'ratio42a', 'ratio43a', 'ratio52a', 'ratio53a',
#         'ratio54a', 'ratio62a', 'ratio63a', 'ratio64a', 'ratio65a','GSAVIa', 'GNDVIa', 'CVIa', 'NDGIa', 'RIa', 'NBRa', 
#         'NDIIa', 'GDVIa',  'MSAVIa', 'DVIa','SAVIa', 'NDVIa', 'MSRa', 'ratio32d', 'ratio42d', 'ratio43d', 'ratio52d', 
#         'ratio53d', 'ratio54d','ratio62d', 'ratio63d', 'ratio64d','ratio65d', 'GSAVId', 'GNDVId',  'CVId', 'NDGId', 
#         'RId',  'NBRd', 'NDIId',  'GDVId', 'MSAVId','DVId',  'SAVId', 'NDVId',  'MSRd','zone',]]

In [23]:
# remove fa
tr = a[['site', 'psB1a', 'psB2a', 'psB3a', 'psB4a', 'psB5a', 'psB6a', 
        'ratio32a', 'ratio42a', 'ratio43a', 'ratio52a', 'ratio53a', 'ratio54a', 'ratio62a', 'ratio63a', 
        'ratio64a', 'ratio65a', 'GSAVIa', 'GNDVIa', 'CVIa', 'NDGIa', 'RIa', 
        'NBRa', 'NDIIa', 'GDVIa',  'MSAVIa', 'DVIa',  'SAVIa', 'NDVIa',  'MSRa']]

In [24]:
print(list(tr))

['site', 'psB1a', 'psB2a', 'psB3a', 'psB4a', 'psB5a', 'psB6a', 'ratio32a', 'ratio42a', 'ratio43a', 'ratio52a', 'ratio53a', 'ratio54a', 'ratio62a', 'ratio63a', 'ratio64a', 'ratio65a', 'GSAVIa', 'GNDVIa', 'CVIa', 'NDGIa', 'RIa', 'NBRa', 'NDIIa', 'GDVIa', 'MSAVIa', 'DVIa', 'SAVIa', 'NDVIa', 'MSRa']


In [25]:
val = tr.sample(5)

In [26]:
val.shape

(5, 30)

In [27]:
val

Unnamed: 0,site,psB1a,psB2a,psB3a,psB4a,psB5a,psB6a,ratio32a,ratio42a,ratio43a,...,NDGIa,RIa,NBRa,NDIIa,GDVIa,MSAVIa,DVIa,SAVIa,NDVIa,MSRa
113,legu03,835.833333,1223.25,1716.25,2770.666667,4245.583333,3738.833333,14030247,22650044,16143724,...,-1677441,1677441,-1487400,-2102637,1547000,1501595,1054000,1666667,2349532,2705192
132,lit01,68.583333,227.0,225.5,2042.083333,1058.5,405.333333,9933921,89959618,90558019,...,44248,-44248,6689824,3174194,1815000,3401977,1817000,3750516,8014998,20125663
154,ntastu0001,453.0,769.666667,1045.222222,2361.444444,2940.111111,1915.555556,13580193,30681392,22592750,...,-1521499,1521499,1043031,-1092247,1592000,2082333,1316000,2348323,3863770,5031068
37,lim04,350.416667,617.0,759.583333,1951.5,2327.333333,1590.583333,12310913,31628849,25691717,...,-1031977,1031977,1019486,-878915,1334000,2003637,1192000,2319066,4398524,6032740
136,larr15,218.583333,417.0,524.916667,2052.666667,1843.0,963.666667,12587930,49224620,39104620,...,-1137088,1137088,3611940,536585,1635000,2673564,1528000,3025343,5931677,9788963


In [28]:
tr.to_csv(r'{0}:\cdu\data\zonal_stats\output\{1}\dbg_dp0_df_dp1_dry_dp1_annual_indices.csv'.format(drive, date), index=False)

In [29]:
df_si = pd.merge(df, tr,  how='left', on=['site'])

In [30]:
df_si.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167 entries, 0 to 166
Columns: 244 entries, uid to MSRa
dtypes: float64(230), int64(2), object(12)
memory usage: 319.6+ KB


In [31]:
print (list(df_si))

['uid', 'site', 'date', 'lon_gda94', 'lat_gda94', 'bio_l_kg1ha', 'bio_t_kg1ha', 'bio_b_kg1ha', 'bio_w_kg1ha', 'bio_br_kg1ha', 'bio_s_kg1ha', 'bio_r_kg1ha', 'bio_agb_kg1ha', 'c_l_kg1ha', 'c_t_kg1ha', 'c_b_kg1ha', 'c_w_kg1ha', 'c_br_kg1ha', 'c_s_kg1ha', 'c_r_kg1ha', 'c_agb_kg1ha', 'geometry', 'basal_dt', 'b1_dbg_count', 'b1_dbg_min', 'b1_dbg_max', 'b1_dbg_mean', 'b1_dbg_med', 'b1_dbg_std', 'b1_dbg_p25', 'b1_dbg_p50', 'b1_dbg_p75', 'b1_dbg_p95', 'b1_dbg_p99', 'b1_dbg_range', 'b2_dbg_count', 'b2_dbg_min', 'b2_dbg_max', 'b2_dbg_mean', 'b2_dbg_med', 'b2_dbg_std', 'b2_dbg_p25', 'b2_dbg_p50', 'b2_dbg_p75', 'b2_dbg_p95', 'b2_dbg_p99', 'b2_dbg_range', 'b3_dbg_count', 'b3_dbg_min', 'b3_dbg_max', 'b3_dbg_mean', 'b3_dbg_med', 'b3_dbg_std', 'b3_dbg_p25', 'b3_dbg_p50', 'b3_dbg_p75', 'b3_dbg_p95', 'b3_dbg_p99', 'b3_dbg_range', 'b4_dbg_count', 'b4_dbg_min', 'b4_dbg_max', 'b4_dbg_mean', 'b4_dbg_med', 'b4_dbg_std', 'b4_dbg_p25', 'b4_dbg_p50', 'b4_dbg_p75', 'b4_dbg_p95', 'b4_dbg_p99', 'b4_dbg_range', 'b5_

In [32]:
drop_list = ['psB1a', 'psB2a', 'psB3a', 'psB4a', 'psB5a', 'psB6a']
# Drop multiple columns
df_si.drop(drop_list, axis = 1, inplace=True)

In [33]:
df_si

Unnamed: 0,uid,site,date,lon_gda94,lat_gda94,bio_l_kg1ha,bio_t_kg1ha,bio_b_kg1ha,bio_w_kg1ha,bio_br_kg1ha,...,NDGIa,RIa,NBRa,NDIIa,GDVIa,MSAVIa,DVIa,SAVIa,NDVIa,MSRa
0,119,nt001,20110523,131.209577,-13.945196,0.000000,0.000000,0.000000,0.000000,0.000000,...,-88149.0,88149.0,4198751.0,1377945.0,2223000.0,3797405.0,2214000.0,4028384.0,6824908.0,13019620.0
1,42,barkely01,20111025,135.040695,-18.001762,0.000000,0.000000,0.000000,0.000000,0.000000,...,-1250000.0,1250000.0,81301.0,-1954321.0,745000.0,840140.0,515000.0,1018457.0,1992263.0,2237584.0
2,43,barkely02,20111026,135.259768,-18.056963,0.000000,0.000000,0.000000,0.000000,0.000000,...,,,,,,,,,,
3,45,barkely04,20111026,135.225612,-17.984787,0.000000,0.000000,0.000000,0.000000,0.000000,...,-1458454.0,1458454.0,100756.0,-2053505.0,869000.0,1013578.0,618000.0,1221344.0,2386100.0,2754508.0
4,44,barkely03,20111026,135.288375,-18.024510,0.000000,0.000000,0.000000,0.000000,0.000000,...,-1673020.0,1673020.0,785530.0,-1451976.0,1104000.0,1083204.0,709000.0,1256350.0,2046176.0,2306558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,5,ntadac0002,20160506,132.340300,-12.739220,662.207022,718.913844,2368.019189,12460.882933,5334.929444,...,-1368573.0,1368573.0,1052960.0,-403029.0,1178000.0,1664492.0,989000.0,1962561.0,3864791.0,5032873.0
163,1,ntaarp0001,20160602,132.270100,-13.557290,1114.699000,1209.657556,3360.251222,18298.821333,7796.607778,...,-1927320.0,1927320.0,2472185.0,168142.0,1676000.0,2230377.0,1379000.0,2517342.0,4286602.0,5813109.0
164,2,ntaarp0002,20160602,132.294400,-13.545610,885.523606,961.609483,3075.566939,13170.989572,7308.612694,...,-2271147.0,2271147.0,-461779.0,-1352886.0,1161000.0,1238583.0,769000.0,1462533.0,2663665.0,3138328.0
165,3,ntaarp0003,20160603,132.457100,-13.516970,628.215708,682.369917,2271.717583,9263.866500,5445.672167,...,-1878073.0,1878073.0,-4017481.0,-3915048.0,511000.0,567343.0,320000.0,735294.0,2094241.0,2368514.0


In [34]:
dict_ = {
    'ratio32a':'ratio32t', 'ratio42a': 'ratio42t',
    'ratio43a': 'ratio43t', 'ratio52a': 'ratio52t',
    'ratio53a': 'ratio53t', 'ratio54a': 'ratio54t',
    'ratio62a': 'ratio62t', 'ratio63a': 'ratio63t',
    'ratio64a': 'ratio64t', 'ratio65a': 'ratio65t',
    'GSAVIa': 'GSAVIt', 'GNDVIa': 'GNDVIt',
    'CVIa': 'CVIt', 'NDGIa': 'NDGIt', 
    'RIa': 'RIt', 'NBRa': 'NBRt', 
    'NDIIa': 'NDIIt', 'GDVIa': 'GDVIt', 
    'MSAVIa': 'MSAVIt', 'DVIa': 'DVIt', 
    'SAVIa': 'SAVIt', 'NDVIa': 'NDVIt', 'MSRa': 'MSRt'
}

In [35]:
df_si.rename(columns=dict_, inplace=True)

In [36]:
print (list(df_si))

['uid', 'site', 'date', 'lon_gda94', 'lat_gda94', 'bio_l_kg1ha', 'bio_t_kg1ha', 'bio_b_kg1ha', 'bio_w_kg1ha', 'bio_br_kg1ha', 'bio_s_kg1ha', 'bio_r_kg1ha', 'bio_agb_kg1ha', 'c_l_kg1ha', 'c_t_kg1ha', 'c_b_kg1ha', 'c_w_kg1ha', 'c_br_kg1ha', 'c_s_kg1ha', 'c_r_kg1ha', 'c_agb_kg1ha', 'geometry', 'basal_dt', 'b1_dbg_count', 'b1_dbg_min', 'b1_dbg_max', 'b1_dbg_mean', 'b1_dbg_med', 'b1_dbg_std', 'b1_dbg_p25', 'b1_dbg_p50', 'b1_dbg_p75', 'b1_dbg_p95', 'b1_dbg_p99', 'b1_dbg_range', 'b2_dbg_count', 'b2_dbg_min', 'b2_dbg_max', 'b2_dbg_mean', 'b2_dbg_med', 'b2_dbg_std', 'b2_dbg_p25', 'b2_dbg_p50', 'b2_dbg_p75', 'b2_dbg_p95', 'b2_dbg_p99', 'b2_dbg_range', 'b3_dbg_count', 'b3_dbg_min', 'b3_dbg_max', 'b3_dbg_mean', 'b3_dbg_med', 'b3_dbg_std', 'b3_dbg_p25', 'b3_dbg_p50', 'b3_dbg_p75', 'b3_dbg_p95', 'b3_dbg_p99', 'b3_dbg_range', 'b4_dbg_count', 'b4_dbg_min', 'b4_dbg_max', 'b4_dbg_mean', 'b4_dbg_med', 'b4_dbg_std', 'b4_dbg_p25', 'b4_dbg_p50', 'b4_dbg_p75', 'b4_dbg_p95', 'b4_dbg_p99', 'b4_dbg_range', 'b5_

In [41]:
clean_df = df_si[[
     'site', 'bio_agb_kg1ha',     
     'b1_dbg_min', 'b1_dbg_max', 'b1_dbg_mean', 'b1_dbg_med', 'b1_dbg_std', 'b1_dbg_p25', 
    'b1_dbg_p50', 'b1_dbg_p75', 'b1_dbg_p95', 'b1_dbg_p99', 'b2_dbg_min', 'b2_dbg_max', 
    'b2_dbg_mean', 'b2_dbg_med', 'b2_dbg_std', 'b2_dbg_p25', 'b2_dbg_p50', 'b2_dbg_p75', 'b2_dbg_p95', 'b2_dbg_p99', 
    'b3_dbg_min', 'b3_dbg_max', 'b3_dbg_mean', 'b3_dbg_med', 'b3_dbg_std', 'b3_dbg_p25', 
    'b3_dbg_p50', 'b3_dbg_p75', 'b3_dbg_p95', 'b3_dbg_p99', 'b4_dbg_min', 'b4_dbg_max', 
    'b4_dbg_mean', 'b4_dbg_med', 'b4_dbg_std', 'b4_dbg_p25', 'b4_dbg_p50', 'b4_dbg_p75', 'b4_dbg_p95', 'b4_dbg_p99', 
     'b5_dbg_min', 'b5_dbg_max', 'b5_dbg_mean', 'b5_dbg_med', 'b5_dbg_std', 'b5_dbg_p25', 
    'b5_dbg_p50', 'b5_dbg_p75', 'b5_dbg_p95', 'b5_dbg_p99', 'b6_dbg_min', 'b6_dbg_max', 
    'b6_dbg_mean', 'b6_dbg_med', 'b6_dbg_std', 'b6_dbg_p25', 'b6_dbg_p50', 'b6_dbg_p75', 'b6_dbg_p95', 'b6_dbg_p99', 
     'b1_dp0_min', 'b1_dp0_max', 'b1_dp0_mean', 'b1_dp0_std', 'b1_dp0_med', 'b1_dp0_p25', 'b1_dp0_p50', 'b1_dp0_p75', 
    'b1_dp0_p95', 'b1_dp0_p99',
    'b2_dp0_min', 'b2_dp0_max', 'b2_dp0_mean','b2_dp0_std', 'b2_dp0_med', 'b2_dp0_p25', 'b2_dp0_p50', 
    'b2_dp0_p75', 'b2_dp0_p95', 'b2_dp0_p99',  'b3_dp0_min', 'b3_dp0_max', 'b3_dp0_mean', 
    'b3_dp0_med', 'b3_dp0_p25', 'b3_dp0_p50', 'b3_dp0_p75', 'b3_dp0_p95', 'b3_dp0_p99',  'b3_dp0_std', 
     'b1_dp1d_min', 'b1_dp1d_max', 'b1_dp1d_mean', 
     'b1_dp1d_std', 'b1_dp1d_med', 'b1_dp1d_p25', 'b1_dp1d_p50', 'b1_dp1d_p75', 'b1_dp1d_p95', 
    'b1_dp1d_p99',  'b2_dp1d_min', 'b2_dp1d_max', 'b2_dp1d_mean',  'b2_dp1d_std', 
    'b2_dp1d_med', 'b2_dp1d_p25', 'b2_dp1d_p50', 'b2_dp1d_p75', 'b2_dp1d_p95', 'b2_dp1d_p99d',  
    'b3_dp1d_min', 'b3_dp1d_max', 'b3_dp1d_mean',  'b3_dp1d_med', 'b3_dp1d_p25', 'b3_dp1d_p50', 
    'b3_dp1d_p75', 'b3_dp1d_p95', 'b3_dp1d_p99',  'b3_dp1d_std', 
    'b1_dp1a_min', 'b1_dp1a_max', 'b1_dp1a_mean',  'b1_dp1a_std', 'b1_dp1a_med', 
    'b1_dp1a_p25', 'b1_dp1a_p50', 'b1_dp1a_p75', 'b1_dp1a_p95', 'b1_dp1a_p99', 'b2_dp1a_min', 
    'b2_dp1a_max', 'b2_dp1a_mean',  'b2_dp1a_std', 'b2_dp1a_med', 'b2_dp1a_p25', 'b2_dp1a_p50', 
    'b2_dp1a_p75', 'b2_dp1a_p95', 'b2_dp1a_p99', 'b3_dp1a_min', 'b3_dp1a_max', 'b3_dp1a_mean', 
     'b3_dp1a_med', 'b3_dp1a_p25', 'b3_dp1a_p50', 'b3_dp1a_p75', 'b3_dp1a_p95', 'b3_dp1a_p99', 
     'b3_dp1a_std','ratio32t', 'ratio42t', 'ratio43t', 'ratio52t', 
    'ratio53t', 'ratio54t', 'ratio62t', 'ratio63t', 'ratio64t', 'ratio65t', 'GSAVIt', 'GNDVIt', 'CVIt', 'NDGIt', 
    'RIt', 'NBRt', 'NDIIt', 'GDVIt', 'MSAVIt', 'DVIt', 'SAVIt', 'NDVIt', 'MSRt'
]]

In [42]:
df_si.to_csv(r'{0}:\cdu\data\zonal_stats\output\{1}\dbg_dp0_df_dp1_dry_dp1_annual_indices.csv'.format(drive, date), index=False)

In [43]:
clean_df.to_csv(r'{0}:\cdu\data\zonal_stats\output\{1}\dbg_dp0_df_dp1_dry_dp1_annual_indices_clean.csv'.format(drive, date), index=False)