# Biomass Field Data Clean v3

#### env = zonal

This notebook reads in the field data csv (all site data listed by rows) of basal wedge counts within a site.
The notebook, cleans the file by dropping null easting values, filling in the uid column based on the index and fills any missing datum value with WGS84 (data with missing values was collected in 2013).
This function filters the dataframe based on datum and zone information returning four df.
This function Converts each dataframe to geographics GDA94 and returns a list of active df.
This function creates a latitude and longitude column and insert the relevant gda94 coordinates and removes other existing coordinate columns.

This function returns a list of column names that includes the substring (var_) 

This function transfers iterrow data to output df.

This function calculated site proportions and returns df with alive proportions ‘alv_prop’, dead proportions ‘ded_prop’ and total proportions ‘total_prop’.
Exports:
-	Initial_biomass_cleaned.csv


In [1]:
import pandas as pd
import geopandas as gpd
import os

In [2]:
dir_ = r"\\pgb-bas01\DENR_Satellite_Imagery$\Scratch\Rob\tern\tree_biomass_field_data\biomass_carbon"

In [3]:
os.listdir(dir_)

['av_test.csv',
 'av_test2.csv',
 'df2.csv',
 'df2_v2.csv',
 'merged_tree_biomass_obs2013_field_data.csv',
 'merged_tree_biomass_obs2013_field_data_v3.csv',
 'merge_test.csv',
 'site_totals.csv',
 'site_totals_update.csv',
 'site_totals_update2.csv',
 'site_totals_update_drop_dup.csv',
 'site_totals_update_drop_dup_latest.csv',
 'site_totals_update_drop_dup_latest_v2.csv',
 'site_totals_update_drop_dup_latest_v3.csv',
 'test_final_df.csv',
 'tree_biomass_field_data.xlsx',
 'tree_biomass_field_data_copy.csv',
 'tree_biomass_field_data_copy.txt',
 'tree_biomass_field_data_copy.xlsx',
 'tree_biomass_field_data_v2.csv',
 'tree_biomass_field_data_v3.csv']

In [4]:
# original filed data test (13 sites)
#csv_ = os.path.join(dir_, "tree_biomass_field_data_copy.csv")
# merged csv abova and obs sheet 2013
csv_ = os.path.join(dir_, "merged_tree_biomass_obs2013_field_data_v3.csv")

In [5]:
#csv_ = "\\pgb-bas01\DENR_Satellite_Imagery$\Scratch\Rob\tern\tree_biomass_field_data\biomass_carbon\tree_biomass_field_data_copy.csv"

In [6]:
#df = pd.read_excel(csv_, sheet_name="tree_biomass_field_data", encoding='windows-1252')
df = pd.read_csv(csv_, encoding='windows-1252')

In [7]:
df.shape

(70, 309)

In [8]:
print(len(df.site.unique()))

70


In [9]:
def init_clean(df):
    
    """ This function cleans the file by dropping null easting values, 
    filling in the uid column based on the index and fills any missing datum value with 
    WGS84 (data with missing values was collected in 2013)."""
    
    # fill in uid column
    df['uid'] = df.index + 1
    # drop any value where an easting is missing (i.e. no coordinates)
    df.dropna(axis=0, subset=['easting'], inplace= True)
    
    # fill in datum to WGS84 - due to data collection in 2012 2013
    df['datum'] = df['datum'].fillna('WGS84')
    
    return df
     

In [10]:
clean_df = init_clean(df)

In [11]:
clean_df.shape

(70, 309)

In [12]:
def filter_dataframe(df):
    
    """ This function filters the dataframe based on datum and zone information returning four df. """
    
    wgs = df[df['datum']== 'WGS84']
    gda = df[df['datum']== 'GDA94']

    wgs52 = wgs[wgs['zone'] == 52.0]
    wgs53 = wgs[wgs['zone'] == 53.0]

    gda52 = gda[gda['zone'] == 52.0]
    gda53 = gda[gda['zone'] == 53.0]
    
    return [gda52, gda53, wgs52, wgs53]

In [13]:
df_list = filter_dataframe(df)

In [14]:
def convert_gdf(df_list, epsg_list):
    
    """ This function Converts each dataframe to geographics GDA94 and returns a list of active df. """
    
    gdf_list = []
    for df, epsg in zip(df_list, epsg_list):
        gdf = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(df.easting, df.northing))

        gdf = gdf.set_crs(epsg= epsg) #'epsg:{0}'.format(str(epsg)))

        gdf = gdf.to_crs(4283)

        print(gdf.crs)
        gdf_list.append(gdf)
          
    return gdf_list

In [15]:
gdf_list = convert_gdf(df_list, [28352, 28353, 32752, 32753])

epsg:4283
epsg:4283
epsg:4283
epsg:4283


In [16]:
def concat_and_clean(gdf_list):
    """ This function creates a latitude and longitude column and 
    insert the relevant gda94 coordinates and removes other existing coordinate columns. """
    
    final = pd.concat(gdf_list)
    
    final['lon_gda94'] = final.geometry.x
    final['lat_gda94'] = final.geometry.y

    del final['zone']
    del final['easting']
    del final['northing']
    del final['lattitude']
    del final['longitude']
    
    final['datum'] = 'GDA94'
    
    return final
    

In [17]:
clean_gdf = concat_and_clean(gdf_list)
print(clean_gdf.shape)
print(len(clean_gdf.site.unique()))

(69, 307)
69


##### Export to csv

In [18]:
clean_df_ = pd.DataFrame(clean_gdf)
clean_df = clean_df_.reset_index(drop=True)
# fill in uid column
clean_df['uid'] = clean_df.index + 1
clean_df

Unnamed: 0,uid,site,date,datum,factor,count,cent_sp01,cent_l_sp01,cent_d_sp01,cent_sp02,...,cent_sp10.1,cent_l_sp10.1,cent_d_sp10.1,cent_sp11.1,cent_l_sp11.1,cent_d_sp11.1,date_am,geometry,lon_gda94,lat_gda94
0,1,girra02,5/6/2012,GDA94,0.10,7.0,Melaleuca nervoa,23.0,0.0,Lophostemon lactifluus,...,,,,,,,20120605,POINT (131.13200 -12.52341),131.132000,-12.523407
1,2,lit01,26/04/2013,GDA94,0.50,7.0,E. tetrodonta,15.0,0.0,E. chlorostachys,...,,,,,,,20130426,POINT (131.06192 -12.54791),131.061917,-12.547912
2,3,buff01,13/07/2012,GDA94,0.25,7.0,E. tetrodonta,9.0,1.0,Gardenia sp.,...,,,,,,,20120713,POINT (130.89478 -11.79389),130.894779,-11.793886
3,4,ep01,13/07/2012,GDA94,0.50,7.0,Camoty,30.0,1.0,Mid layer vine,...,,,,,,,20120713,POINT (130.82165 -12.41110),130.821649,-12.411103
4,5,girra01,5/6/2012,GDA94,0.50,6.0,E. tetrodonta,1.0,0.0,E. miniata,...,,,,,,,20120605,POINT (131.07802 -12.51412),131.078022,-12.514123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,65,mlp11a,2013-08-10 08:46:21,GDA94,,7.0,,,,,...,,,,,,,20130810 08:46:21,POINT (135.64991 -17.31166),135.649906,-17.311655
65,66,mlp13,2013-05-10 00:44:10,GDA94,,7.0,,,,,...,,,,,,,20130510 00:44:10,POINT (135.54417 -17.17327),135.544166,-17.173267
66,67,mlp14a,2013-08-11 10:00:10,GDA94,,7.0,,,,,...,,,,,,,20130811 10:00:10,POINT (135.68196 -17.18551),135.681963,-17.185512
67,68,mlp15a,2013-08-12 07:45:26,GDA94,,7.0,,,,,...,,,,,,,20130812 07:45:26,POINT (-133.30497 -89.38381),-133.304969,-89.383813


In [20]:
clean_df = pd.DataFrame(clean_gdf)
clean_df.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\initial_biomass_cleaned_v3.csv", index=False)

In [21]:
def search_for_names(gdf, var_):
    """ This function returns a list of column names that includes the substring (var_) """
    var_cols = [col for col in gdf.columns if var_ in col]

    return var_cols

In [22]:
var_cols =search_for_names(clean_gdf, "sp")

In [23]:
# define a list of column header basal positions
pos_list = ["cent", "north", "south", "ne", "se", "nw", "sw"]

In [24]:
def add_columns(df2, row):
    
    """ This function transfers iterrow data to output df. """
    
    df2['site'] = row['site']
    df2['uid'] =  row['uid']
    df2['date'] = str(row['date_am'])
    df2['factor'] = row['factor']
    df2['loc_count'] = row['count']
    
#     # replace 9999.0 from factor
#     fact_ = row['factor']
    
    
#     if fact_ == 9999.0:
#         print("fact_: ", fact_)
#         factor_ = 0.0
#         print("factor_: ", factor_)  
#     else:
#         factor_ == fact_
      
#     df2['factor'] = factor_
#     df2['factor'] = row['factor']
    df2['geometry'] = row['geometry']
    df2['lon_gda94'] = row['lon_gda94']
    df2['lat_gda94'] = row['lat_gda94']
    
    return df2

In [25]:
clean_gdf.columns
clean_gdf.shape

(69, 307)

In [26]:
import numpy as np
clean_gdf.fillna(9999, inplace=True)

In [27]:
def prop(df):
    
    """ This function calculated site proportions and returns df with alive proportions 
    ‘alv_prop’, dead proportions ‘ded_prop’ and total proportions ‘total_prop’. """
    
    print("--------------------------------------PROPORTIONS-----------------------------------")
    print(df)
    al_prop = []
    ded_prop = []
    total_prop = []
    tot_alive = df.alive.sum()
    print("tot_alive: ", tot_alive)

    tot_dead = df.dead.sum()
    print("tot_dead: ", tot_dead)
    tot_all = tot_alive + tot_dead
    print("tot_all: ", tot_all)
    for index, row in df.iterrows():
        print("-"*50)
        print("alive proportion: ", (row["alive"]/tot_alive)*100)
        al_prop.append((row["alive"]/tot_alive)*100)
        print("dead proportion: ", (row["dead"]/tot_dead)*100)
        ded_prop.append((row["dead"]/tot_dead)*100)
        print("total proportion: ", ((row["alive"] + row["dead"])/(tot_alive + tot_dead)*100))
        
        total_prop.append((row["alive"] + row["dead"])/(tot_alive + tot_dead)*100)
        
    df["alv_prop"] = al_prop
    df["ded_prop"] = ded_prop
    df["total_prop"] = total_prop
    
    return df
        

In [28]:
df_list = []
index_list = []

site_name_list = []
site_count_list = []
site_average_alive_list = []
site_average_dead_list = []
site_average_total_list = []

for index, row in clean_gdf.iterrows():
#     print(index)
#     print(row)
    row.dropna(inplace=True)
    site = row["site"]
    count = row["count"]
    uid = row["uid"]
    print("="*50)
    print(site)
    print("count: ", count)
    site_list = []
    index_list.append(site_list)
    species = []
    alive_count = []
    dead_count = []
    
    site_list = []
    count_list = []
    site_pos_al = []
    site_pos_ded = []
    site_pos_tot = []
    
    for pos in pos_list:
        pos_species = []
        pos_sp_live = []
        pos_sp_dead = []
        position_list = []
        
        pos_cols = [col for col in clean_df.columns if pos in col]
#         print("pos_cols: ", pos_cols)
        for cols in pos_cols:
#             print("="*50)
#             print(cols)
            if "{0}_sp".format(pos) in cols:
#                 print("species")

                if(row[cols] == "9999"):
                    pass
                    #print('true')
                elif(row[cols] == 9999):
                    pass
                    #print('true')
                elif(row[cols] == 9999.0):
                    pass
                    #print('true')
                else:
                    print("Valid species: ", row[cols])
                    species.append(row[cols])
                    pos_species.append(row[cols])
                    
                    
                                        
            elif "{0}_l".format(pos) in cols:
                #print("live")
                live = row[cols]
                
                if(row[cols] == "9999"):
                    pass
                    #print('true')
                elif(row[cols] == 9999):
                    pass
                    #print('true')
                elif(row[cols] == 9999.0):
                    pass
                    #print('true')
                else:
                    print("Valid alive: ", row[cols])
                    alive_count.append(row[cols])
                    pos_sp_live.append(row[cols])
                    
                
                    
            elif "{0}_d".format(pos) in cols:
                #print("dead")
                dead = row[cols]
                
                
                if(row[cols] == "9999"):
                    pass
                    #print('true')
                elif(row[cols] == 9999):
                    pass
                    #print('true')
                elif(row[cols] == 9999.0):
                    pass
                    #print('true')
                else:
                    print("Valid dead: ", row[cols])
                    dead_count.append(row[cols])
                    pos_sp_dead.append(row[cols])
                    
            else:
                print("ERROR"*100)

            
        sum_al = sum(pos_sp_live)
        sum_dead = sum(pos_sp_dead)
        sum_all = sum_al + sum_dead
        #print("sum_all: ", sum_all)
        #print(site)
        #print(count)
        site_list.append(site)
        count_list.append(count)
        site_pos_al.append(sum_al)
        site_pos_ded.append(sum_dead)
        site_pos_tot.append(sum_all)

        print("position totals: ", sum_al, sum_dead, sum_all)
        print("-"*50)
    print("Total basal count locations within site: ", count)
    print("List of ALIVE position totals stems: ", site_pos_al)
    print("Totals at: ", sum(site_pos_al))
    print("Equals: ", sum(site_pos_al)/count)


    print("List of DEAD position totals stems: ", site_pos_ded)
    print("Totals at: ", sum(site_pos_ded))
    print("Equals: ", sum(site_pos_ded)/count)
    

    print("List of TOTAL position totals stems: ", site_pos_tot)
    print("Totals at: ", sum(site_pos_tot))
    print("Equals: ", sum(site_pos_tot)/count)
    
    site_name_list.append(site)
    site_count_list.append(count)
    site_average_alive_list.append(sum(site_pos_al)/count)
    site_average_dead_list.append(sum(site_pos_ded)/count)
    site_average_total_list.append(sum(site_pos_tot)/count)

    
    print('='*20)
    
    df = pd.DataFrame(list(zip(species, alive_count, dead_count)),
           columns =['species', 'alive', 'dead'])
    


    print("Groupby Species: ")
    # groupby species list
    df2 = df.groupby('species').sum()
    df3 = df2.reset_index()
    print("*"*50)
    print("groupby dataframe: ", df3)
    #print("df columns: ", df3.columns)
    
    # calculate strand species proprtions
    df3 = prop(df3)
    print("-"*50, df3)
    # add columns and fill
    df3 = add_columns(df3, row)
    
    
    print("-"*50, df3)
    if df3.empty:
        print("+"*50)
        print("Data frame is Empty")
        
        empty_header_list = ["species", "alive", "dead", "alv_prop", "ded_prop", "total_prop"]
        #,  'site', 'uid', 'date', 'factor', 'loc_count', 'geometry', 'lon_gda94', 'lat_gda94']
        empty_data_list = ["None", 0.0, 0.0, 0.0, 0.0, 0.0] #, site, uid, date, factor, loc_count, geometry, lon_gda94, lat_gda_94]
        
        # convert the list into dataframe row
        data = pd.DataFrame(empty_data_list).T
 
        # add columns
        data.columns = empty_header_list

        df3 = data
        df3 = add_columns(df3, row)
        
        print("+"*50)
        print("newdf3: ", df3)
    else:
        print("*"*50)
        print("Data frame is NOT Empty")
        df3 = add_columns(df3, row)
        df3_columns = df3.columns.tolist()
        print(df3_columns)
        print("*"*50) 
    
    df_list.append(df3)
   

# site_name_list = []
# site_count_list = []
# site_average_alive_list = []
# site_average_dead_list = []
# site_average_total_list = []

print("+"*50)
print(site_name_list)
print(site_average_alive_list)

site_average_alive_list
site_average_dead_list
site_average_total_list
print("-"*50)

# create lists of usefull data
factor_list = clean_df['factor'].tolist()
print("Factor list: ", factor_list)

factor = [0.0 if item == 9999.0 else item for item in factor_list]
print("Factor: ", factor)


## Convert averages by timing the site average strand counts by the basl wedge factor
cor_site_average_alive_list = [i*fac for i, fac in zip(site_average_alive_list, factor)]
cor_site_average_dead_list = [i*fac for i, fac in zip(site_average_dead_list, factor)]
cor_site_average_total_list = [i*fac for i, fac in zip(site_average_total_list, factor)]


df_av = pd.DataFrame(list(zip(site_name_list, factor, site_count_list, site_average_alive_list, site_average_dead_list, site_average_total_list,
                             cor_site_average_alive_list, cor_site_average_dead_list, cor_site_average_total_list)),
       columns =['site', 'factor', 'count', 'avg_alive', 'avg_dead', 'avg_total', 'cor_av_liv', 'cor_av_ded', 'cor_av_tot'])
print("DF_AV", df_av)

girra02
count:  7.0
Valid species:  Melaleuca nervoa
Valid alive:  23.0
Valid dead:  0.0
Valid species:  Lophostemon lactifluus
Valid alive:  1.0
Valid dead:  0.0
Valid species:  P. spiralis
Valid alive:  2.0
Valid dead:  0.0
Valid species:  Eucalyptus alba
Valid alive:  1.0
Valid dead:  0.0
position totals:  27.0 0.0 27.0
--------------------------------------------------
Valid species:  Melaleuca nervoa
Valid alive:  20.0
Valid dead:  5.0
Valid species:  P. spiralis
Valid alive:  1.0
Valid dead:  1.0
Valid species:  Eucalyptus alba
Valid alive:  2.0
Valid dead:  0.0
position totals:  23.0 6.0 29.0
--------------------------------------------------
Valid species:  Melaleuca nervoa
Valid alive:  3.0
Valid dead:  8.0
Valid species:  Lophostemon lactifluus
Valid alive:  4.0
Valid dead:  2.0
Valid species:  P. spiralis
Valid alive:  2.0
Valid dead:  0.0
Valid species:  Eucalyptus alba
Valid alive:  4.0
Valid dead:  0.0
position totals:  13.0 10.0 23.0
-------------------------------------

11             Terminalia sp.    4.0   4.0
--------------------------------------PROPORTIONS-----------------------------------
                      species  alive  dead
0               Antidesma sp.    7.0   0.0
1          Buchanania obovata    4.0   0.0
2    Dolichandrone filiformis    1.0   0.0
3            E. chlorostachys    1.0   1.0
4               E. tetrodonta  107.0   5.0
5        Exocarpos latifolius    7.0   0.0
6                Gardenia sp.   12.0   0.0
7                  L. humilis    2.0   0.0
8                   P. careya   19.0   3.0
9   Polictiardorisa filitarni    0.0   0.0
10           Strychnos lucida    2.0   0.0
11             Terminalia sp.    4.0   4.0
tot_alive:  166.0
tot_dead:  13.0
tot_all:  179.0
--------------------------------------------------
alive proportion:  4.216867469879518
dead proportion:  0.0
total proportion:  3.910614525139665
--------------------------------------------------
alive proportion:  2.4096385542168677
dead proportion:  0.0
total

--------------------------------------------------
Valid species:  Acacia auriculiformis
Valid alive:  9.0
Valid dead:  3.0
Valid species:  Lophostemon lactifluus
Valid alive:  6.0
Valid dead:  0.0
Valid species:  dead
Valid alive:  0.0
Valid dead:  5.0
Valid species:  P. careya
Valid alive:  1.0
Valid dead:  0.0
Valid species:  E. chlorostachys
Valid alive:  1.0
Valid dead:  0.0
Valid species:  Eucalyptus bella
Valid alive:  1.0
Valid dead:  0.0
Valid species:  Eucalyptus tectifica
Valid alive:  1.0
Valid dead:  0.0
position totals:  19.0 8.0 27.0
--------------------------------------------------
Valid species:  Acacia auriculiformis
Valid alive:  9.0
Valid dead:  3.0
Valid species:  Lophostemon lactifluus
Valid alive:  1.0
Valid dead:  0.0
Valid species:  P. careya
Valid alive:  1.0
Valid dead:  0.0
Valid species:  E. chlorostachys
Valid alive:  1.0
Valid dead:  0.0
Valid species:  Mallotus nesophilus
Valid alive:  5.0
Valid dead:  0.0
Valid species:  Breynia cerunea
Valid alive:  1

Valid species:  E. miniata
Valid alive:  2.0
Valid dead:  2.0
Valid species:  E. chlorostachys
Valid alive:  15.0
Valid dead:  0.0
Valid species:  Corymbia polycarpa
Valid alive:  2.0
Valid dead:  0.0
Valid species:  Corymbia polysciada
Valid alive:  5.0
Valid dead:  0.0
Valid species:  P. spiralis
Valid alive:  1.0
Valid dead:  0.0
Valid species:  L. humilis
Valid alive:  7.0
Valid dead:  0.0
Valid species:  dead
Valid alive:  0.0
Valid dead:  5.0
position totals:  34.0 7.0 41.0
--------------------------------------------------
Valid species:  E. tetrodonta
Valid alive:  4.0
Valid dead:  0.0
Valid species:  E. miniata
Valid alive:  4.0
Valid dead:  0.0
Valid species:  E. chlorostachys
Valid alive:  7.0
Valid dead:  0.0
Valid species:  Corymbia polycarpa
Valid alive:  3.0
Valid dead:  0.0
Valid species:  Corymbia polysciada
Valid alive:  4.0
Valid dead:  0.0
Valid species:  L. humilis
Valid alive:  5.0
Valid dead:  0.0
Valid species:  dead
Valid alive:  0.0
Valid dead:  1.0
Valid spec

--------------------------------------------------
alive proportion:  0.7352941176470588
dead proportion:  0.0
total proportion:  0.6711409395973155
--------------------------------------------------                     species  alive  dead   alv_prop   ded_prop  total_prop
0           Acacia latesens    6.0   2.0   4.411765  15.384615    5.369128
1        Buchanania obovata    2.0   0.0   1.470588   0.000000    1.342282
2       Corymbia ferruginea    4.0   1.0   2.941176   7.692308    3.355705
3              Corymbia sp.    1.0   0.0   0.735294   0.000000    0.671141
4           Denhmai obscura    1.0   0.0   0.735294   0.000000    0.671141
5          E. chlorostachys    2.0   2.0   1.470588  15.384615    2.684564
6                E. miniata   75.0   6.0  55.147059  46.153846   54.362416
7             E. tetrodonta   37.0   2.0  27.205882  15.384615   26.174497
8            Eucalyptus sp.    1.0   0.0   0.735294   0.000000    0.671141
9       Exacarpos latafolia    5.0   0.0   3.67647

Groupby Species: 
**************************************************
groupby dataframe:                 species  alive  dead
0   Buchanania obovata    5.0   0.0
1  Corymbia ferruginea    6.0   0.0
2     E. chlorostachys   16.0   5.0
3           E. miniata   36.0   3.0
4        E. tetrodonta  116.0   6.0
5         P. pubescens    6.0   1.0
6       Planchonia sp.    1.0   1.0
--------------------------------------PROPORTIONS-----------------------------------
               species  alive  dead
0   Buchanania obovata    5.0   0.0
1  Corymbia ferruginea    6.0   0.0
2     E. chlorostachys   16.0   5.0
3           E. miniata   36.0   3.0
4        E. tetrodonta  116.0   6.0
5         P. pubescens    6.0   1.0
6       Planchonia sp.    1.0   1.0
tot_alive:  186.0
tot_dead:  16.0
tot_all:  202.0
--------------------------------------------------
alive proportion:  2.6881720430107525
dead proportion:  0.0
total proportion:  2.4752475247524752
--------------------------------------------------


5                    Shrub x    3.0   0.0
tot_alive:  114.0
tot_dead:  25.0
tot_all:  139.0
--------------------------------------------------
alive proportion:  0.0
dead proportion:  4.0
total proportion:  0.7194244604316548
--------------------------------------------------
alive proportion:  78.0701754385965
dead proportion:  96.0
total proportion:  81.29496402877699
--------------------------------------------------
alive proportion:  0.0
dead proportion:  0.0
total proportion:  0.0
--------------------------------------------------
alive proportion:  3.508771929824561
dead proportion:  0.0
total proportion:  2.877697841726619
--------------------------------------------------
alive proportion:  15.789473684210526
dead proportion:  0.0
total proportion:  12.949640287769784
--------------------------------------------------
alive proportion:  2.631578947368421
dead proportion:  0.0
total proportion:  2.158273381294964
--------------------------------------------------               

position totals:  15.0 0.0 15.0
--------------------------------------------------
Valid species:  Euc.coolibah
Valid alive:  2.0
Valid dead:  0.0
Valid species:  Acasia victoria
Valid alive:  0.0
Valid dead:  0.0
Valid species:  Exco.parvifilia
Valid alive:  5.0
Valid dead:  0.0
position totals:  7.0 0.0 7.0
--------------------------------------------------
Valid species:  Euc.coolibah
Valid alive:  5.0
Valid dead:  0.0
Valid species:  Acasia victoria
Valid alive:  0.0
Valid dead:  0.0
Valid species:  Exco.parvifilia
Valid alive:  2.0
Valid dead:  0.0
position totals:  7.0 0.0 7.0
--------------------------------------------------
Valid species:  Euc.coolibah
Valid alive:  12.0
Valid dead:  0.0
Valid species:  Acasia victoria
Valid alive:  1.0
Valid dead:  0.0
position totals:  13.0 0.0 13.0
--------------------------------------------------
Total basal count locations within site:  7.0
List of ALIVE position totals stems:  [7.0, 5.0, 14.0, 15.0, 7.0, 7.0, 13.0]
Totals at:  68.0
Equa

newdf3:    species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  eva10a   31   

                date  factor  loc_count  \
0  20130719 11:00:29  9999.0        7.0   

                                       geometry   lon_gda94  lat_gda94  
0  POINT (134.6929538002481 -17.98821865701277)  134.692954 -17.988219  
eva11a
count:  7.0
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
Total basal count locations within site:  7.0
List of ALIVE position totals stems

position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
Total basal count locations within site:  7.0
List of ALIVE position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
List of DEAD position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
List of TOTAL position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
Groupby Species: 
**************************************************
groupby dataframe:  Empty DataFrame
Columns: [species, alive, dead]
Index: []
--------------------------------------PROPORTIONS-----------------------------------
Empty DataFrame
Columns: [species, alive, dead]
Index: []
tot_alive:  0.0
tot_dead:  0.0
tot_all:  0.0
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop]
Index: []
-------------------------------------------------- Empty DataFrame
C

newdf3:    species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  hls12a   43   

                date  factor  loc_count  \
0  20131010 08:45:25  9999.0        7.0   

                                       geometry   lon_gda94  lat_gda94  
0  POINT (134.2906272113261 -18.19650201400229)  134.290627 -18.196502  
hls13a
count:  7.0
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
Total basal count locations within site:  7.0
List of ALIVE position totals stems

Groupby Species: 
**************************************************
groupby dataframe:  Empty DataFrame
Columns: [species, alive, dead]
Index: []
--------------------------------------PROPORTIONS-----------------------------------
Empty DataFrame
Columns: [species, alive, dead]
Index: []
tot_alive:  0.0
tot_dead:  0.0
tot_all:  0.0
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop]
Index: []
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop, site, uid, date, factor, loc_count, geometry, lon_gda94, lat_gda94]
Index: []
++++++++++++++++++++++++++++++++++++++++++++++++++
Data frame is Empty
++++++++++++++++++++++++++++++++++++++++++++++++++
newdf3:    species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  hls19a   50   

                date  factor  loc_count  \
0  20131

Groupby Species: 
**************************************************
groupby dataframe:  Empty DataFrame
Columns: [species, alive, dead]
Index: []
--------------------------------------PROPORTIONS-----------------------------------
Empty DataFrame
Columns: [species, alive, dead]
Index: []
tot_alive:  0.0
tot_dead:  0.0
tot_all:  0.0
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop]
Index: []
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop, site, uid, date, factor, loc_count, geometry, lon_gda94, lat_gda94]
Index: []
++++++++++++++++++++++++++++++++++++++++++++++++++
Data frame is Empty
++++++++++++++++++++++++++++++++++++++++++++++++++
newdf3:    species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  mgb08a   55   

                date  factor  loc_count  \
0  20130

position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
position totals:  0 0 0
--------------------------------------------------
Total basal count locations within site:  7.0
List of ALIVE position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
List of DEAD position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
List of TOTAL position totals stems:  [0, 0, 0, 0, 0, 0, 0]
Totals at:  0
Equals:  0.0
Groupby Species: 
**************************************************
groupby dataframe:  Empty DataFrame
Columns: [species, alive, dead]
Index: []
--------------------------------------PROPORTIONS-----------------------------------
Empty DataFrame
Columns: [species, alive, dead]
Index: []
tot_alive:  0.0
tot_dead:  0.0
tot_all:  0.0
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop]
Ind

tot_alive:  0.0
tot_dead:  0.0
tot_all:  0.0
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop]
Index: []
-------------------------------------------------- Empty DataFrame
Columns: [species, alive, dead, alv_prop, ded_prop, total_prop, site, uid, date, factor, loc_count, geometry, lon_gda94, lat_gda94]
Index: []
++++++++++++++++++++++++++++++++++++++++++++++++++
Data frame is Empty
++++++++++++++++++++++++++++++++++++++++++++++++++
newdf3:    species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  mlp11a   66   

                date  factor  loc_count  \
0  20130810 08:46:21  9999.0        7.0   

                                       geometry   lon_gda94  lat_gda94  
0  POINT (135.6499062958772 -17.31165539117357)  135.649906 -17.311655  
mlp13
count:  7.0
position totals:  0 0 0
--------------------------------------------------
position totals

In [29]:
df_av.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\av_test2.csv", index=False)

# Original

In [30]:
# df_list = []
# index_list = []

# site_name_list = []
# site_count_list = []
# site_average_alive_list = []
# site_average_dead_list = []
# site_average_total_list = []

# for index, row in clean_gdf.iterrows():
# #     print(index)
# #     print(row)
#     row.dropna(inplace=True)
#     site = row["site"]
#     count = row["count"]
#     uid = row["uid"]
#     print("="*50)
#     print(site)
#     print("count: ", count)
#     site_list = []
#     index_list.append(site_list)
#     species = []
#     alive_count = []
#     dead_count = []
    
#     site_list = []
#     count_list = []
#     site_pos_al = []
#     site_pos_ded = []
#     site_pos_tot = []
    
#     for pos in pos_list:
#         pos_species = []
#         pos_sp_live = []
#         pos_sp_dead = []
#         position_list = []
        
#         pos_cols = [col for col in clean_df.columns if pos in col]
# #         print("pos_cols: ", pos_cols)
#         for cols in pos_cols:
# #             print("="*50)
# #             print(cols)
#             if "{0}_sp".format(pos) in cols:
# #                 print("species")

#                 if(row[cols] == "9999"):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999.0):
#                     pass
#                     #print('true')
#                 else:
#                     print("Valid species: ", row[cols])
#                     species.append(row[cols])
#                     pos_species.append(row[cols])
                    
                    
                                        
#             elif "{0}_l".format(pos) in cols:
#                 #print("live")
#                 live = row[cols]
                
#                 if(row[cols] == "9999"):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999.0):
#                     pass
#                     #print('true')
#                 else:
#                     print("Valid alive: ", row[cols])
#                     alive_count.append(row[cols])
#                     pos_sp_live.append(row[cols])
                    
                
                    
#             elif "{0}_d".format(pos) in cols:
#                 #print("dead")
#                 dead = row[cols]
                
                
#                 if(row[cols] == "9999"):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999):
#                     pass
#                     #print('true')
#                 elif(row[cols] == 9999.0):
#                     pass
#                     #print('true')
#                 else:
#                     print("Valid dead: ", row[cols])
#                     dead_count.append(row[cols])
#                     pos_sp_dead.append(row[cols])
                    
#             else:
#                 print("ERROR"*100)

            
#         sum_al = sum(pos_sp_live)
#         sum_dead = sum(pos_sp_dead)
#         sum_all = sum_al + sum_dead
#         #print("sum_all: ", sum_all)
#         #print(site)
#         #print(count)
#         site_list.append(site)
#         count_list.append(count)
#         site_pos_al.append(sum_al)
#         site_pos_ded.append(sum_dead)
#         site_pos_tot.append(sum_all)

#         print("position totals: ", sum_al, sum_dead, sum_all)
#         print("-"*50)
#     print("Total basal count locations within site: ", count)
#     print("List of ALIVE position totals stems: ", site_pos_al)
#     print("Totals at: ", sum(site_pos_al))
#     print("Equals: ", sum(site_pos_al)/count)


#     print("List of DEAD position totals stems: ", site_pos_ded)
#     print("Totals at: ", sum(site_pos_ded))
#     print("Equals: ", sum(site_pos_ded)/count)
    

#     print("List of TOTAL position totals stems: ", site_pos_tot)
#     print("Totals at: ", sum(site_pos_tot))
#     print("Equals: ", sum(site_pos_tot)/count)
    
#     site_name_list.append(site)
#     site_count_list.append(count)
#     site_average_alive_list.append(sum(site_pos_al)/count)
#     site_average_dead_list.append(sum(site_pos_ded)/count)
#     site_average_total_list.append(sum(site_pos_tot)/count)

    
#     print('='*20)
    
#     df = pd.DataFrame(list(zip(species, alive_count, dead_count)),
#            columns =['species', 'alive', 'dead'])

#     print("Groupby Species: ")
#     # groupby species list
#     df2 = df.groupby('species').sum()
#     df3 = df2.reset_index()
#     print("*"*50)
#     print("groupby dataframe: ", df3)
#     #print("df columns: ", df3.columns)
    
#     # calculate strand species proprtions
#     df3 = prop(df3)
#     print("-"*50, df3)
#     # add columns and fill
#     df3 = add_columns(df3, row)
    
    
#     print("-"*50, df3)
#     df_list.append(df3)
   

# # site_name_list = []
# # site_count_list = []
# # site_average_alive_list = []
# # site_average_dead_list = []
# # site_average_total_list = []

# print("+"*50)
# print(site_name_list)
# print(site_average_alive_list)

# site_average_alive_list
# site_average_dead_list
# site_average_total_list
# print("-"*50)

# # create lists of usefull data
# factor_list = clean_df['factor'].tolist()
# print("Factor list: ", factor_list)

# factor = [0.0 if item == 9999.0 else item for item in factor_list]
# print("Factor: ", factor)


# ## Convert averages by timing the site average strand counts by the basl wedge factor
# cor_site_average_alive_list = [i*fac for i, fac in zip(site_average_alive_list, factor)]
# cor_site_average_dead_list = [i*fac for i, fac in zip(site_average_dead_list, factor)]
# cor_site_average_total_list = [i*fac for i, fac in zip(site_average_total_list, factor)]


# df_av = pd.DataFrame(list(zip(site_name_list, factor, site_count_list, site_average_alive_list, site_average_dead_list, site_average_total_list,
#                              cor_site_average_alive_list, cor_site_average_dead_list, cor_site_average_total_list)),
#        columns =['site', 'factor', 'count', 'avg_alive', 'avg_dead', 'avg_total', 'cor_av_liv', 'cor_av_ded', 'cor_av_tot'])
# print("DF_AV", df_av)

In [31]:
print(df_av.shape)
print(len(df_av.site.unique()))

(69, 9)
69


In [32]:
df_av

Unnamed: 0,site,factor,count,avg_alive,avg_dead,avg_total,cor_av_liv,cor_av_ded,cor_av_tot
0,girra02,0.10,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
1,lit01,0.50,7.0,34.571429,1.428571,36.000000,17.285714,0.714286,18.000000
2,buff01,0.25,7.0,23.714286,1.857143,25.571429,5.928571,0.464286,6.392857
3,ep01,0.50,7.0,26.428571,2.571429,29.000000,13.214286,1.285714,14.500000
4,girra01,0.50,6.0,25.833333,1.166667,27.000000,12.916667,0.583333,13.500000
...,...,...,...,...,...,...,...,...,...
64,mlp11a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
65,mlp13,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
66,mlp14a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
67,mlp15a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [33]:
df_av.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\df_av_v2.csv", index=False)

In [34]:
for i in df_list:
    print("-"*50)
    print(i)

--------------------------------------------------
                  species  alive  dead   alv_prop   ded_prop  total_prop  \
0         Eucalyptus alba   12.0   0.0   8.333333   0.000000    7.142857   
1  Lophostemon lactifluus   20.0   4.0  13.888889  16.666667   14.285714   
2        Melaleuca nervoa   93.0  19.0  64.583333  79.166667   66.666667   
3             P. spiralis   19.0   1.0  13.194444   4.166667   11.904762   

      site  uid      date  factor  loc_count  \
0  girra02    5  20120605     0.1        7.0   
1  girra02    5  20120605     0.1        7.0   
2  girra02    5  20120605     0.1        7.0   
3  girra02    5  20120605     0.1        7.0   

                                       geometry  lon_gda94  lat_gda94  
0  POINT (131.1320003975128 -12.52340665621054)    131.132 -12.523407  
1  POINT (131.1320003975128 -12.52340665621054)    131.132 -12.523407  
2  POINT (131.1320003975128 -12.52340665621054)    131.132 -12.523407  
3  POINT (131.1320003975128 -12.5234066

--------------------------------------------------
  species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  hls18a   49   

                date  factor  loc_count  \
0  20131008 10:34:05  9999.0        7.0   

                                       geometry   lon_gda94  lat_gda94  
0  POINT (134.3914683987728 -18.34578493665652)  134.391468 -18.345785  
--------------------------------------------------
  species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0  hls19a   50   

                date  factor  loc_count  \
0  20131008 15:49:33  9999.0        7.0   

                                       geometry   lon_gda94  lat_gda94  
0  POINT (134.4827320278218 -18.35285102733756)  134.482732 -18.352851  
--------------------------------------------------
  species alive dead alv_prop ded_prop total_prop    site  uid  \
0    None     0    0        0        0          0

In [35]:
df1 = pd.concat(df_list)
df2 = df1.reset_index(drop=True)

In [36]:
df2

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor,loc_count,geometry,lon_gda94,lat_gda94
0,Eucalyptus alba,12,0,8.33333,0,7.14286,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132000,-12.523407
1,Lophostemon lactifluus,20,4,13.8889,16.6667,14.2857,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132000,-12.523407
2,Melaleuca nervoa,93,19,64.5833,79.1667,66.6667,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132000,-12.523407
3,P. spiralis,19,1,13.1944,4.16667,11.9048,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132000,-12.523407
4,Buchorvet,11,0,4.54545,0,4.36508,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,,0,0,0,0,0,mlp13,67,20130510 00:44:10,9999.0,7.0,POINT (135.544165764074 -17.17326698471134),135.544166,-17.173267
189,,0,0,0,0,0,mlp14a,68,20130811 10:00:10,9999.0,7.0,POINT (135.6819632199659 -17.18551191793024),135.681963,-17.185512
190,,0,0,0,0,0,mlp15a,69,20130812 07:45:26,9999.0,7.0,POINT (-133.3049687273343 -89.38381337119318),-133.304969,-89.383813
191,Acacia aneura,0,2,0,100,11.1111,ubr08a,70,20130829 09:16:56,0.1,7.0,POINT (133.4832569765393 -25.79711532218585),133.483257,-25.797115


In [37]:

df2['factor'] = df2['factor'].replace(9999.0,0.0)
print(df2)


                    species alive dead alv_prop ded_prop total_prop     site  \
0           Eucalyptus alba    12    0  8.33333        0    7.14286  girra02   
1    Lophostemon lactifluus    20    4  13.8889  16.6667    14.2857  girra02   
2          Melaleuca nervoa    93   19  64.5833  79.1667    66.6667  girra02   
3               P. spiralis    19    1  13.1944  4.16667    11.9048  girra02   
4                 Buchorvet    11    0  4.54545        0    4.36508    lit01   
..                      ...   ...  ...      ...      ...        ...      ...   
188                    None     0    0        0        0          0    mlp13   
189                    None     0    0        0        0          0   mlp14a   
190                    None     0    0        0        0          0   mlp15a   
191           Acacia aneura     0    2        0      100    11.1111   ubr08a   
192             Hakea lorea    16    0      100        0    88.8889   ubr08a   

     uid               date  factor  lo

In [38]:
df_list[0]
print(len(df_list))

69


In [39]:
df2.head(10)

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor,loc_count,geometry,lon_gda94,lat_gda94
0,Eucalyptus alba,12,0,8.33333,0.0,7.14286,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132,-12.523407
1,Lophostemon lactifluus,20,4,13.8889,16.6667,14.2857,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132,-12.523407
2,Melaleuca nervoa,93,19,64.5833,79.1667,66.6667,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132,-12.523407
3,P. spiralis,19,1,13.1944,4.16667,11.9048,girra02,5,20120605,0.1,7.0,POINT (131.1320003975128 -12.52340665621054),131.132,-12.523407
4,Buchorvet,11,0,4.54545,0.0,4.36508,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
5,C. armstrongii,3,0,1.23967,0.0,1.19048,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
6,C. fraseri,2,0,0.826446,0.0,0.793651,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
7,Corymbia sp.,1,0,0.413223,0.0,0.396825,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
8,E. chlorostachys,38,2,15.7025,20.0,15.873,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912
9,E. miniata,18,0,7.43802,0.0,7.14286,lit01,1,20130426,0.5,7.0,POINT (131.06191720062 -12.54791228170243),131.061917,-12.547912


In [40]:
print(df2.shape)
print(len(df2.site.unique()))

(193, 14)
69


In [41]:
df_av

Unnamed: 0,site,factor,count,avg_alive,avg_dead,avg_total,cor_av_liv,cor_av_ded,cor_av_tot
0,girra02,0.10,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
1,lit01,0.50,7.0,34.571429,1.428571,36.000000,17.285714,0.714286,18.000000
2,buff01,0.25,7.0,23.714286,1.857143,25.571429,5.928571,0.464286,6.392857
3,ep01,0.50,7.0,26.428571,2.571429,29.000000,13.214286,1.285714,14.500000
4,girra01,0.50,6.0,25.833333,1.166667,27.000000,12.916667,0.583333,13.500000
...,...,...,...,...,...,...,...,...,...
64,mlp11a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
65,mlp13,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
66,mlp14a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
67,mlp15a,0.00,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [42]:
print(df_av.site.unique())

['girra02' 'lit01' 'buff01' 'ep01' 'girra01' 'hshr01' 'hsf01' 'hsf02'
 'wedo01' 'wed03' 'umb07' 'cst03a' 'centa13' 'centa14' 'gulf11' 'atl24a'
 'btl01a' 'btl02a' 'btl03a' 'btl05a' 'btl06a' 'btl07a' 'eva02a' 'eva03a'
 'eva04a' 'eva05a' 'eva06a' 'eva07a' 'eva08a' 'eva09a' 'eva10a' 'eva11a'
 'hls01a' 'hls02a' 'hls03a' 'hls04a' 'hls05a' 'hls06a' 'hls07a' 'hls08a'
 'hls09a' 'hls0a' 'hls12a' 'hls13a' 'hls14a' 'hls15a' 'hls16a' 'hls17'
 'hls18a' 'hls19a' 'mgb02a' 'mgb04' 'mgb05a' 'mgb07a' 'mgb08a' 'mgb09a'
 'mgb10a' 'mlp02' 'mlp03a' 'mlp04a' 'mlp06a' 'mlp07a' 'mlp08' 'mlp10a'
 'mlp11a' 'mlp13' 'mlp14a' 'mlp15a' 'ubr08a']


In [43]:
print(df2.site.unique())

['girra02' 'lit01' 'buff01' 'ep01' 'girra01' 'hshr01' 'hsf01' 'hsf02'
 'wedo01' 'wed03' 'umb07' 'cst03a' 'centa13' 'centa14' 'gulf11' 'atl24a'
 'btl01a' 'btl02a' 'btl03a' 'btl05a' 'btl06a' 'btl07a' 'eva02a' 'eva03a'
 'eva04a' 'eva05a' 'eva06a' 'eva07a' 'eva08a' 'eva09a' 'eva10a' 'eva11a'
 'hls01a' 'hls02a' 'hls03a' 'hls04a' 'hls05a' 'hls06a' 'hls07a' 'hls08a'
 'hls09a' 'hls0a' 'hls12a' 'hls13a' 'hls14a' 'hls15a' 'hls16a' 'hls17'
 'hls18a' 'hls19a' 'mgb02a' 'mgb04' 'mgb05a' 'mgb07a' 'mgb08a' 'mgb09a'
 'mgb10a' 'mlp02' 'mlp03a' 'mlp04a' 'mlp06a' 'mlp07a' 'mlp08' 'mlp10a'
 'mlp11a' 'mlp13' 'mlp14a' 'mlp15a' 'ubr08a']


In [44]:
df2.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\df2_v2.csv")

In [45]:
result = pd.merge(df2, df_av, on=["site", "site"])

### Dataframe has site averages and coorrected averages calculated

column headers:
 - avg: average stem counts per site (i.e. sum(cent, north etc.)/num(basal counts within site)
 - cor_av: corrected stem count average (i.e. avg * factor)
    

In [46]:
result

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor_x,...,lon_gda94,lat_gda94,factor_y,count,avg_alive,avg_dead,avg_total,cor_av_liv,cor_av_ded,cor_av_tot
0,Eucalyptus alba,12,0,8.33333,0,7.14286,girra02,5,20120605,0.1,...,131.132000,-12.523407,0.1,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
1,Lophostemon lactifluus,20,4,13.8889,16.6667,14.2857,girra02,5,20120605,0.1,...,131.132000,-12.523407,0.1,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
2,Melaleuca nervoa,93,19,64.5833,79.1667,66.6667,girra02,5,20120605,0.1,...,131.132000,-12.523407,0.1,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
3,P. spiralis,19,1,13.1944,4.16667,11.9048,girra02,5,20120605,0.1,...,131.132000,-12.523407,0.1,7.0,20.714286,3.428571,24.142857,2.071429,0.342857,2.414286
4,Buchorvet,11,0,4.54545,0,4.36508,lit01,1,20130426,0.5,...,131.061917,-12.547912,0.5,7.0,34.571429,1.428571,36.000000,17.285714,0.714286,18.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,,0,0,0,0,0,mlp13,67,20130510 00:44:10,0.0,...,135.544166,-17.173267,0.0,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
189,,0,0,0,0,0,mlp14a,68,20130811 10:00:10,0.0,...,135.681963,-17.185512,0.0,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
190,,0,0,0,0,0,mlp15a,69,20130812 07:45:26,0.0,...,-133.304969,-89.383813,0.0,7.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
191,Acacia aneura,0,2,0,100,11.1111,ubr08a,70,20130829 09:16:56,0.1,...,133.483257,-25.797115,0.1,7.0,2.285714,0.285714,2.571429,0.228571,0.028571,0.257143


In [47]:
print(len(result.site.unique()))

69


In [48]:
#df2_ = basal_area_m2(result)
df2_ = result

In [49]:
df2_.sample(3)

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor_x,...,lon_gda94,lat_gda94,factor_y,count,avg_alive,avg_dead,avg_total,cor_av_liv,cor_av_ded,cor_av_tot
158,,0,0,0.0,0,0.0,hls12a,43,20131010 08:45:25,0.0,...,134.290627,-18.196502,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0
131,Dichromaflioa,18,0,15.7895,0,12.9496,btl05a,19,20130830 07:42:17,0.25,...,134.230964,-16.904579,0.25,7.0,16.285714,3.571429,19.857143,4.071429,0.892857,4.964286
59,Premna acuminata,3,0,1.76471,0,1.5625,hshr01,6,20120606,1.0,...,131.062073,-12.416566,1.0,7.0,24.285714,3.142857,27.428571,24.285714,3.142857,27.428571


In [50]:
# def proportions(df_):
#     """ Calculate the site perportions % """
#     df_list = []
#     for uid in df_.uid.unique():
#         print(uid)
#         df = df_[df_['uid']== uid]
#         al_prop_list = []
#         dead_prop_list = []
#         tot_prop_list = []
#         total_alive = df['alive'].sum()
#         print("total alive count: ", total_alive)
#         total_dead = df["dead"].sum()
#         for species in df["species"].unique():
#             alive = df.loc[df["species"]==species, "alive"].iloc[0]
#             dead = df.loc[df["species"]==species, "dead"].iloc[0]

#             alive_port = alive / total_alive *100
#             dead_port = dead / total_dead *100
#             tot_port = (alive + dead) /(total_alive + total_dead) *100

#             al_prop_list.append(alive_port)
#             dead_prop_list.append(dead_port)
#             tot_prop_list.append(tot_port)

#             print("total_alive: ", total_alive)

#         df["al_prop"] = al_prop_list
#         df["d_prop"] = dead_prop_list
#         df["tot_prop"] = tot_prop_list
        
#         df_list.append(df)
#     df1 = pd.concat(df_list)
    
#     return df1

In [51]:
#df3 = proportions(df2_)

In [52]:
#df3

In [53]:
def define_class(df):
    
    class_ = []
    leaves = []
    leaves_oth = []
    twigs = []
    bark = []
    bark_oth = []
    wood = []
    wood_oth = []
    branches = []
    branches_oth = []
    stems = []
    stems_oth = []
    agb = []
    agb_oth = []
    root = []
    root_oth = []
    tot_bm = []
    
    
    for index, row in df.iterrows():
        
        species = row["species"]
        #print(species)
        
    
        if species == "E. tetrodonta" or species == " Eucalyptus tetrodonta":
            
            #print("Eute")
            class_.append("Eute")
            leaves.append(122)
            leaves_oth.append(0.84)
            twigs.append(127)
            bark.append(341)
            bark_oth.append(0.99)
            wood.append(2161)
            wood_oth.append(0.93)
            branches.append(799)
            branches_oth.append(0.85)
            stems.append(2502)
            stems_oth.append(0.95)
            agb.append(3403)
            agb_oth.append(0.97)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(3945)
            
        elif species == "E. miniata" or species == "Eucalyptus miniata":
   
            #print("Eumi")
            class_.append("Eumi")
            leaves.append(50)
            leaves_oth.append(0.96)
            twigs.append(52)
            bark.append(218)
            bark_oth.append(0.92)
            wood.append(1829)
            wood_oth.append(0.95)
            branches.append(375)
            branches_oth.append(0.79)
            stems.append(2047)
            stems_oth.append(0.96)
            agb.append(2472)
            agb_oth.append(0.96)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(3014)

        elif species == "E. porrecta" or species == "Eucalyptus porrecta" or species =="C. porrecta" or species == "Corymbia porrecta":
               
            #print("Eupo")    
            class_.append("Eupo")
            leaves.append(73)
            leaves_oth.append(0.85)
            twigs.append(76)
            bark.append(326)
            bark_oth.append(0.98)
            wood.append(1289)
            wood_oth.append(0.98)
            branches.append(619)
            branches_oth.append(0.96)
            stems.append(1616)
            stems_oth.append(0.90)
            agb.append(2308)
            agb_oth.append(0.98)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(2850)
            
        elif species == "E. bleeseri" or species == "Eucalyptus bleeseri" or species =="C. bleeseri" or species == "Corymbia bleeseri":
            
            #print("Eubl")
            class_.append("Eubl")
            leaves.append(49)
            leaves_oth.append(0.80)
            twigs.append(51)
            bark.append(347)
            bark_oth.append(0.97)
            wood.append(2225)
            wood_oth.append(0.97)
            branches.append(1163)
            branches_oth.append(0.90)
            stems.append(2573)
            stems_oth.append(0.97)
            agb.append(3785)
            agb_oth.append(0.96)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(4327)
            
            
        elif species == "E. chlorostachys" or species == "Erythrophleum chlorostachys":
            
            #print("Erch")
            class_.append("Erch")
            leaves.append(154)
            leaves_oth.append(0.75)
            twigs.append(160)
            bark.append(401)
            bark_oth.append(0.95)
            wood.append(1044)
            wood_oth.append(0.92)
            branches.append(814)
            branches_oth.append(0.62)
            stems.append(1445)
            stems_oth.append(0.93)
            agb.append(2413)
            agb_oth.append(0.82)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(2955)
            
        else:
            
            #print("Other")
            class_.append("Tefe")
            leaves.append(93)
            leaves_oth.append(0.89)
            twigs.append(97)
            bark.append(379)
            bark_oth.append(0.92)
            wood.append(1233)
            wood_oth.append(0.92)
            branches.append(935)
            branches_oth.append(0.83)
            stems.append(1612)
            stems_oth.append(0.92)
            agb.append(2640)
            agb_oth.append(0.91)
            root.append(542)
            root_oth.append(0.57)
            tot_bm.append(3182)
            
    df["co_ef_nme"] = class_
    df["leaves"] = leaves
    df["leaves_r2"] = leaves_oth
    df["twigs"] = twigs
    df["bark"] = bark
    df["bark_r2"] = bark_oth
    df["wood"] = wood
    df["wood_r2"] = wood_oth
    df["branches"]= branches
    df["branches_r2"] = branches_oth
    df["stems"] = stems
    df["stems_r2"] = stems_oth
    df["agb"] = agb
    df["agb_r2"] = agb_oth
    df["root"] = root
    df["root_r2"] = root_oth
    df["tot_bm"] = tot_bm
    
    
    return df
            
    

In [54]:
df3 = df2_

In [55]:
df4 = define_class(df3)
df4.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\df4.csv", index=False)
df4

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor_x,...,wood_r2,branches,branches_r2,stems,stems_r2,agb,agb_r2,root,root_r2,tot_bm
0,Eucalyptus alba,12,0,8.33333,0,7.14286,girra02,5,20120605,0.1,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
1,Lophostemon lactifluus,20,4,13.8889,16.6667,14.2857,girra02,5,20120605,0.1,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
2,Melaleuca nervoa,93,19,64.5833,79.1667,66.6667,girra02,5,20120605,0.1,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
3,P. spiralis,19,1,13.1944,4.16667,11.9048,girra02,5,20120605,0.1,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
4,Buchorvet,11,0,4.54545,0,4.36508,lit01,1,20130426,0.5,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,,0,0,0,0,0,mlp13,67,20130510 00:44:10,0.0,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
189,,0,0,0,0,0,mlp14a,68,20130811 10:00:10,0.0,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
190,,0,0,0,0,0,mlp15a,69,20130812 07:45:26,0.0,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182
191,Acacia aneura,0,2,0,100,11.1111,ubr08a,70,20130829 09:16:56,0.1,...,0.92,935,0.83,1612,0.92,2640,0.91,542,0.57,3182


In [56]:
for i in df4.columns:
    print(i)

species
alive
dead
alv_prop
ded_prop
total_prop
site
uid
date
factor_x
loc_count
geometry
lon_gda94
lat_gda94
factor_y
count
avg_alive
avg_dead
avg_total
cor_av_liv
cor_av_ded
cor_av_tot
co_ef_nme
leaves
leaves_r2
twigs
bark
bark_r2
wood
wood_r2
branches
branches_r2
stems
stems_r2
agb
agb_r2
root
root_r2
tot_bm


In [71]:
def total_ba_m2(df):
    
    # Total basal area
    tot_ba = []
    al_ba = []
    ded_ba = []
    
    # Carbom stock
    c_leaves = []
    c_twigs = []
    c_bark = []
    c_wood = []
    c_branches = []
    c_stems = []
    c_agb = []
    c_roots = []
    

        
    for index, row in df.iterrows():
        #print(index)
        
        # Calculate total site basal area
        total_basal = (row["total_prop"] * row["cor_av_tot"])/100
        tot_ba.append(total_basal)
        #tot_ba.append((row["total_prop"] * row["cor_av_tot"])/100)
        
        alive_basal = (row["alv_prop"] * row["cor_av_liv"])/100
        al_ba.append(alive_basal)
        #al_ba.append((row["alv_prop"] * row["cor_av_liv"])/100)
        
        dead_basal = (row["ded_prop"] * row["cor_av_ded"])/100
        ded_ba.append(dead_basal)
        #ded_ba.append((row["ded_prop"] * row["cor_av_ded"])/100)
        # Calculate site carbon
        
        c_leaves.append(total_basal * row["leaves"])
        c_twigs.append(total_basal * row["twigs"])
        c_bark.append(total_basal * row["bark"])
        c_wood.append(total_basal * row["wood"])
        c_branches.append(total_basal * row["branches"])
        c_stems.append(total_basal * row["stems"])
        c_agb.append(total_basal * row["agb"])
        c_roots.append(total_basal * row["root"])
        
#         c_leaves.append(row["total_prop"] * row["leaves"])
#         c_twigs.append(row["total_prop"] * row["twigs"])
#         c_bark.append(row["total_prop"] * row["bark"])
#         c_wood.append(row["total_prop"] * row["wood"])
#         c_branches.append(row["total_prop"] * row["branches"])
#         c_stems.append(row["total_prop"] * row["stems"])
#         c_agb.append(row["total_prop"] * row["agb"])
#         c_roots.append(row["total_prop"] * row["root"])

    # Total basal area    
    df["total_alv_ba"] = al_ba
    df["total_ded_ba"] = ded_ba
    df["total_ba"] = tot_ba
    # Carbon stock
    df["c_leaves"] = c_leaves
    df["c_twigs"] = c_twigs
    df["c_bark"] = c_bark
    df["c_wood"] = c_wood
    df["c_branches"] = c_branches
    df["c_stems"] = c_stems
    df["c_agb"] = c_agb
    df["c_roots"] = c_roots
            
    return df
        

In [72]:
df4.columns

Index(['species', 'alive', 'dead', 'alv_prop', 'ded_prop', 'total_prop',
       'site', 'uid', 'date', 'factor_x', 'loc_count', 'geometry', 'lon_gda94',
       'lat_gda94', 'factor_y', 'count', 'avg_alive', 'avg_dead', 'avg_total',
       'cor_av_liv', 'cor_av_ded', 'cor_av_tot', 'co_ef_nme', 'leaves',
       'leaves_r2', 'twigs', 'bark', 'bark_r2', 'wood', 'wood_r2', 'branches',
       'branches_r2', 'stems', 'stems_r2', 'agb', 'agb_r2', 'root', 'root_r2',
       'tot_bm', 'total_alv_ba', 'total_ded_ba', 'total_ba', 'c_leaves',
       'c_twigs', 'c_bark', 'c_wood', 'c_branches', 'c_stems', 'c_agb',
       'c_roots'],
      dtype='object')

In [73]:
df5 =total_ba_m2(df4)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192


In [74]:

df5

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor_x,...,total_ded_ba,total_ba,c_leaves,c_twigs,c_bark,c_wood,c_branches,c_stems,c_agb,c_roots
0,Eucalyptus alba,12,0,8.33333,0,7.14286,girra02,5,20120605,0.1,...,0.000000,0.172449,16.037755,16.727551,65.358163,212.629592,161.239796,277.987755,455.265306,93.467347
1,Lophostemon lactifluus,20,4,13.8889,16.6667,14.2857,girra02,5,20120605,0.1,...,0.057143,0.344898,32.075510,33.455102,130.716327,425.259184,322.479592,555.975510,910.530612,186.934694
2,Melaleuca nervoa,93,19,64.5833,79.1667,66.6667,girra02,5,20120605,0.1,...,0.271429,1.609524,149.685714,156.123810,610.009524,1984.542857,1504.904762,2594.552381,4249.142857,872.361905
3,P. spiralis,19,1,13.1944,4.16667,11.9048,girra02,5,20120605,0.1,...,0.014286,0.287415,26.729592,27.879252,108.930272,354.382653,268.732993,463.312925,758.775510,155.778912
4,Buchorvet,11,0,4.54545,0,4.36508,lit01,1,20130426,0.5,...,0.000000,0.785714,73.071429,76.214286,297.785714,968.785714,734.642857,1266.571429,2074.285714,425.857143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,,0,0,0,0,0,mlp13,67,20130510 00:44:10,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
189,,0,0,0,0,0,mlp14a,68,20130811 10:00:10,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
190,,0,0,0,0,0,mlp15a,69,20130812 07:45:26,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
191,Acacia aneura,0,2,0,100,11.1111,ubr08a,70,20130829 09:16:56,0.1,...,0.028571,0.028571,2.657143,2.771429,10.828571,35.228571,26.714286,46.057143,75.428571,15.485714


In [75]:
df5.columns

Index(['species', 'alive', 'dead', 'alv_prop', 'ded_prop', 'total_prop',
       'site', 'uid', 'date', 'factor_x', 'loc_count', 'geometry', 'lon_gda94',
       'lat_gda94', 'factor_y', 'count', 'avg_alive', 'avg_dead', 'avg_total',
       'cor_av_liv', 'cor_av_ded', 'cor_av_tot', 'co_ef_nme', 'leaves',
       'leaves_r2', 'twigs', 'bark', 'bark_r2', 'wood', 'wood_r2', 'branches',
       'branches_r2', 'stems', 'stems_r2', 'agb', 'agb_r2', 'root', 'root_r2',
       'tot_bm', 'total_alv_ba', 'total_ded_ba', 'total_ba', 'c_leaves',
       'c_twigs', 'c_bark', 'c_wood', 'c_branches', 'c_stems', 'c_agb',
       'c_roots'],
      dtype='object')

In [76]:
df5.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\df5_v6.csv", index=False)

In [78]:
df5.sample(2)

Unnamed: 0,species,alive,dead,alv_prop,ded_prop,total_prop,site,uid,date,factor_x,...,total_ded_ba,total_ba,c_leaves,c_twigs,c_bark,c_wood,c_branches,c_stems,c_agb,c_roots
69,E. miniata,1,0,0.362319,0,0.352113,hsf02,8,20120710,0.5,...,0.0,0.071429,3.571429,3.714286,15.571429,130.642857,26.785714,146.214286,176.571429,38.714286
181,Acacia sp,5,0,7.04225,0,6.57895,mlp08,64,20130510 00:44:10,0.25,...,0.0,0.178571,16.607143,17.321429,67.678571,220.178571,166.964286,287.857143,471.428571,96.785714


In [79]:
def total_kg_per_site(df):
    df_list = []

    for uid in df.uid.unique():
        df1 = df[df["uid"]==uid]
        #print(df1)
        
        df1["total_leaves"] = (df1.loc[:, "c_leaves"].sum())*0.47
        
        df1["total_twigs"] = (df1.loc[:, "c_twigs"].sum())*0.49
        
        df1["total_bark"] = (df1.loc[:, "c_bark"].sum())*0.49
        
        df1["total_wood"] = (df1.loc[:, "c_wood"].sum())*0.49
        
        df1["total_bran"] = (df1.loc[:, "c_branches"].sum())*0.49
        
        df1["total_stems"] = (df1.loc[:, "c_stems"].sum())*0.49
        
        df1["total_roots"] = (df1.loc[:, "c_roots"].sum())*0.49
        
        df1["total_agb"] = (df1.loc[:, "c_agb"].sum())*0.49
        
        print(df1)
        df_list.append(df1)

        
        #dfmi.loc[:, ('one', 'second')]
        
#     for index, row in df.iterrows():
#         tot_ba_ha.append((row["tot_prop"] * row["ba_all_m2"])/100)
#         al_ba_ha.append((row["al_prop"] * row["ba_alive_m2"])/100)
        
#     df["alv_ba_ha"] = al_ba_ha
#     df["tot_ba_ha"] = tot_ba_ha
    final_df = pd.concat(df_list)        
    return final_df

In [80]:
df5 = total_kg_per_site(df5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See th

                  species alive dead alv_prop ded_prop total_prop     site  \
0         Eucalyptus alba    12    0  8.33333        0    7.14286  girra02   
1  Lophostemon lactifluus    20    4  13.8889  16.6667    14.2857  girra02   
2        Melaleuca nervoa    93   19  64.5833  79.1667    66.6667  girra02   
3             P. spiralis    19    1  13.1944  4.16667    11.9048  girra02   

   uid      date  factor_x  ...        c_agb     c_roots  total_leaves  \
0    5  20120605       0.1  ...   455.265306   93.467347    105.528429   
1    5  20120605       0.1  ...   910.530612  186.934694    105.528429   
2    5  20120605       0.1  ...  4249.142857  872.361905    105.528429   
3    5  20120605       0.1  ...   758.775510  155.778912    105.528429   

   total_twigs  total_bark  total_wood  total_bran  total_stems  total_roots  \
0      114.751     448.357    1458.639    1106.105     1906.996      641.186   
1      114.751     448.357    1458.639    1106.105     1906.996      641.186  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documen

    species alive dead alv_prop ded_prop total_prop    site  uid  \
125    None     0    0        0        0          0  btl02a   17   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
125  20130827 15:00:10       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
125          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
125          0.0        0.0  

[1 rows x 58 columns]
    species alive dead alv_prop ded_prop total_prop    site  uid  \
126    None     0    0        0        0          0  btl03a   18   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
126  20130829 11:25:25       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
126          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
126          0.0        0.0  

[1 rows x 58 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

    species alive dead alv_prop ded_prop total_prop    site  uid  \
152    None     0    0        0        0          0  hls05a   37   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
152  20131012 06:50:08       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
152          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
152          0.0        0.0  

[1 rows x 58 columns]
    species alive dead alv_prop ded_prop total_prop    site  uid  \
153    None     0    0        0        0          0  hls06a   38   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
153  20131012 15:19:55       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
153          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
153          0.0        0.0  

[1 rows x 58 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/u

    species alive dead alv_prop ded_prop total_prop    site  uid  \
174    None     0    0        0        0          0  mgb10a   57   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
174  20130726 10:05:05       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
174          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
174          0.0        0.0  

[1 rows x 58 columns]
    species alive dead alv_prop ded_prop total_prop   site  uid  \
175    None     0    0        0        0          0  mlp02   58   

                  date  factor_x  ...  c_agb c_roots  total_leaves  \
175  20130510 00:44:10       0.0  ...    0.0     0.0           0.0   

     total_twigs  total_bark  total_wood  total_bran  total_stems  \
175          0.0         0.0         0.0         0.0          0.0   

     total_roots  total_agb  
175          0.0        0.0  

[1 rows x 58 columns]
  

In [81]:
df5.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\scratch\five_attempt_v4.csv")

In [82]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 193 entries, 0 to 192
Data columns (total 58 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   species       193 non-null    object 
 1   alive         193 non-null    object 
 2   dead          193 non-null    object 
 3   alv_prop      193 non-null    object 
 4   ded_prop      193 non-null    object 
 5   total_prop    193 non-null    object 
 6   site          193 non-null    object 
 7   uid           193 non-null    int64  
 8   date          193 non-null    object 
 9   factor_x      193 non-null    float64
 10  loc_count     193 non-null    float64
 11  geometry      193 non-null    object 
 12  lon_gda94     193 non-null    float64
 13  lat_gda94     193 non-null    float64
 14  factor_y      193 non-null    float64
 15  count         193 non-null    float64
 16  avg_alive     193 non-null    float64
 17  avg_dead      193 non-null    float64
 18  avg_total     193 non-null    

In [89]:
df_totals = df5[["uid", "site", "date", "lon_gda94",  "lat_gda94", "factor_x", "count", "avg_alive", "avg_dead", "avg_total", "total_leaves",
                "total_twigs", "total_bark", "total_wood", "total_bran", "total_stems", "total_agb", "total_roots"]]

In [84]:
# df_final[df_final["uid"] == 1]

In [87]:
df_totals.drop_duplicates(subset=["site"], inplace=True)
df_totals.to_csv(r"Z:\Scratch\Rob\tern\tree_biomass_field_data\biomass_carbon\biomass_field_data_clean\biomass_totals_v1.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
