# Land Assessment Biomass Field Data

In [2]:
import pandas as pd
import geopandas as gpd
import os

In [7]:
date = "20230125"
drive = "D"

In [11]:
dir_ = r"{0}:\cdu\data\land_assessment".format(drive)
output_dir = r"{0}:\cdu\data\output\{1}".format(drive, date)

In [12]:
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

In [13]:
os.listdir(dir_)

['orack_survey_and_site_information.csv', 'orack_species.csv']

## Import data

In [116]:

csv_sp = os.path.join(dir_, "species.csv")
csv_site = os.path.join(dir_, "survey_and_site_information.csv")
csv_all = os.path.join(dir_, "alldata.csv")

csv_obs = os.path.join(dir_, "observations.csv")
csv_stratum = os.path.join(dir_, "stratum_summary.csv")
csv_form = os.path.join(dir_, "report_extract_growth_form.csv")


df_sp = pd.read_csv(csv_sp)
df_site = pd.read_csv(csv_site)
df_all = pd.read_csv(csv_all)

df_obs = pd.read_csv(csv_obs)
df_stratum = pd.read_csv(csv_stratum)
df_form = pd.read_csv(csv_form)

In [117]:
def site_to_string_fn(df):
    
    df['site_name'] = df['site_name'].astype(str)
    
    return df

In [118]:
def clean_dfs(df_loc, df_species):
    # select and rename required columns for df_loc
    
    dict1 = {'(Survey) Code': 'location',
             '(Site) Code': 'site_name',
            
    }
    df_loc_ = df_loc[['(Survey) Code', '(Site) Code', '(Obs.) Date', '(Site) Datum Code', '(Site) Latitude', '(Site) Longitude', 
                      '(Obs.) Structural Formation', '(Obs.) Site Description', '(Species) Taxon Name']]
    
    
    df_loc_.columns = ['location', 'site_name', 'date', 'datum',  'lat', 'lon', 
                       'str_form', 'description', 'botanical']
    
    df_loc_ = site_to_string_fn(df_loc_)
    
    # select and rename required columns for df species
    df_sp_ = df_sp[['(Survey) Survey Code', '(Site) Site Id', '(Species) Taxon Name', '(Species) Basal Count', 'Basal Area']]
    df_sp_.columns = ['location', 'site_name', 'botanical', 'basal_c', 'basal_a']
    
    df_sp_ = site_to_string_fn(df_sp_)
    
    df_all_ = pd.merge(df_loc_, df_sp_, how='left', on=['site_name', 'location', 'botanical'])
    
    return df_all_, df_loc_, df_sp_

In [119]:
df_all_, df_loc_, df_sp_ = clean_dfs(df_all, df_sp)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [121]:
df_all_

Unnamed: 0,location,site_name,date,datum,lat,lon,str_form,description,botanical,basal_c,basal_a
0,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Sorghum,,
1,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Themeda triandra,,
2,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Planchonia careya,,
3,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Acacia lamprocarpa,,
4,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Buchanania obovata,,
...,...,...,...,...,...,...,...,...,...,...,...
1228,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Tephrosia lamproloboides,,
1229,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Terminalia ferdinandiana,,
1230,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Pleurocarpaea denticulata,,
1231,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Erythrophleum chlorostachys,,


In [113]:
def print_site(df):
    for i in df.site_name.unique():
        print(i)
    

In [122]:
print_site(df_all_)

73
18
17
19
15
16
1
10
11
12
13
14
81
90
91
92
67
68
69
70
71
93
77
79
80
72
74
75
2
82
83
84
85
76
78
46
47
3
86
87
88
89
48
49
50
51
4
5
64
65
66
52
53
54
55
56
57
59
58
60
61
62
63
32
33
34
35
36
37
38
39
40
41
42
43
44
45
22
23
24
25
26
27
28
29
30
31
20
21
7
8
9


In [109]:
def add_survey_site_fn(df1, df2):
    
    dict_ = {'Survey Code': "location",
            'Longitude (Dec/Gda94)': 'lat',
             'Latitude (Dec/Gda94)': 'lon',
            'Site Area M2': 'area'}
      
    # call rename () method
    df1.rename(columns=dict_,
          inplace=True)
    
#     df1_ = site_to_string_fn(df1)
    
    df_out = pd.merge(df2, df1, how='left', on=['location', 'lat', 'lon'])
    
    return df_out

In [110]:
out_df = add_survey_site_fn(df_site[['Survey Code', 'Longitude (Dec/Gda94)', 'Latitude (Dec/Gda94)',  'Site Area M2']], df_all_)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [111]:
df_stratum.columns

Index(['Survey Code', 'Site ID', 'Observation Id', 'Stratum', 'Stratum Rank',
       'Dominant Growthform', 'Cover', 'Average Height M', 'Height Range Min',
       'Height Range Max', 'NVIS Code', 'NVIS Vegetation Hierarchy',
       'Division ID', 'Agency ID'],
      dtype='object')

In [112]:
for i in out_df.site_name.unique():
    print(i)

15
16


In [None]:
# def add_stratum_fn(df1, df2):
    
#     dict_ = {'Survey Code': "location",
#             'Average Height M': 'height_av',
#              'Height Range Max': 'height_max',
#             'Dominant Growthform': 'dom_form'}
      
#     # call rename () method
#     df1.rename(columns=dict_,
#           inplace=True)
    
# #     df1_ = site_to_string_fn(df1)
    
#     df_out = pd.merge(df2, df1, how='left', on=['location', 'lat', 'lon'])

In [103]:
out

Unnamed: 0,location,site_name,date,datum,lat,lon,str_form,description,botanical,basal_c,basal_a,area
0,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Sorghum,,,
1,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Themeda triandra,,,
2,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Planchonia careya,,,
3,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Acacia lamprocarpa,,,
4,DRDAM,73,08-JUL-03,GDA94,-12.8555555557,131.0383305556,woodland,"Erythrophleum chlorostachys, Corymbia grandifo...",Buchanania obovata,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1257,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Tephrosia lamproloboides,,,
1258,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Terminalia ferdinandiana,,,
1259,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Pleurocarpaea denticulata,,,
1260,DRDAM,5,07-MAY-03,GDA94,-12.907666,130.951929,mid open woodland,"Corymbia polysciada, Erythrophleum chlorostach...",Erythrophleum chlorostachys,,,


In [93]:
test

Unnamed: 0,suv_code,lat,lon,area
0,KAFER,132.250194,-13.936991,2500
1,KAFER,132.22714,-13.941526,2500
2,KAFER,132.221182,-13.97495,2500
3,KAFER,133.386607,-13.431869,2500
4,MRC,132.212708,-13.594384,400
...,...,...,...,...
77876,FINKE08,134.144065,-24.619927,-
77877,FINKE08,133.56878,-24.615223,-
77878,FINKE08,134.135692,-24.619043,-
77879,FINKE08,133.547162,-24.613185,-


In [90]:
df_all_.columns

Index(['location', 'site_name', 'date', 'datum', 'lat', 'lon', 'str_form',
       'description', 'botanical', 'basal_c', 'basal_a'],
      dtype='object')

In [87]:
df_site.columns

Index(['Agency', 'Division Id', 'Survey Code', 'Survey Name',
       'Project Officer', 'Date Start', 'Date End', 'Primary Survey Purpose',
       'Floristics', 'Strata', 'Cover Collected', 'Height Collected',
       'Longitude (Dec/Gda94)', 'Latitude (Dec/Gda94)', 'Site Type',
       'Site Area M2'],
      dtype='object')

In [52]:
print(df_all.shape)
df_all.columns

(66, 33)


Index(['(Division) Id', '(Survey) Code', '(Survey) Name',
       '(Survey) Primary Purpose', '(Site) Code',
       '(Site) Location Description', '(Site) Latitude', '(Site) Longitude',
       '(Site) Datum Code', '(Obs.) Code', '(Obs.) Date', '(Obs.) Recorder Id',
       '(Obs.) Structural Formation', '(Obs.) Comments',
       '(Obs.) Site Description', '(Species) Taxon Name', '(Species) Family',
       '(Species) Genus', '(Species) Species', '(Species) Infra Rank',
       '(Species) Infra Name', '(Species) Unidentified Species',
       '(Species) Original Id', '(Species) U1 Cover', '(Species) U2 Cover',
       '(Species) M1 Cover', '(Species) G1 Cover', '(Species) Total Cover',
       '(Species) Height', '(Species) Fruit', '(Species) Flower',
       '(Species) Basal Area Collected (Diameter)',
       'Nvis Vegetation Hierarchy'],
      dtype='object')

In [20]:
df_sp.columns

Index(['(Division) Division Id', '(Survey) Survey Code', '(Site) Site Id',
       '(Obs.) Observation Id', '(Stratum) Strata Code',
       '(Stratum) Cover Type', '(Species) Taxon Name', '(Species) Family',
       '(Species) Genus', '(Species) Species', '(Species) Infraspec Rank',
       '(Species) Infraspec Name', '(Species) Unidentified Species',
       '(Species) Original Id', '(Species) Basal Count', 'Basal Area',
       '(Species) U1 Cover', '(Species) U2 Cover', '(Species) M1 Cover',
       '(Species) G1 Cover', '(Species) Total Cover',
       '(Species) Basal Area Collected (Diameter)'],
      dtype='object')

In [69]:
df_st.columns

Index(['Agency', 'Division Id', 'Survey Code', 'Survey Name',
       'Project Officer', 'Date Start', 'Date End', 'Primary Survey Purpose',
       'Floristics', 'Strata', 'Cover Collected', 'Height Collected',
       'Longitude (Dec/Gda94)', 'Latitude (Dec/Gda94)', 'Site Type',
       'Site Area M2'],
      dtype='object')

In [22]:
id_name_list = df_sp['(Site) Site Id'].unique().tolist()

In [23]:
len(id_name_list)

66

In [21]:
for i in df_sp['(Site) Site Id'].unique():
    print(i)

OC060
OC061
OC062
OC063
OC064
OC006
OC008
OC009
OC010
OC011
OC065
OC066
OC067
OC068
OC012
OC013
OC014
OC015
OC016
OC017
OC018
OC019
OC020
OC021
OC022
OC023
OC024
OC025
OC026
OC027
OC030
OC031
OC032
OC033
OC034
OC035
OC036
OC037
OC038
OC039
OC040
OC041
OC042
OC043
OC044
OC045
OC046
OC047
OC048
OC049
OC050
OC051
OC052
OC001
OC002
OC003
OC053
OC054
OC055
OC004
OC005
OC007
OC056
OC057
OC058
OC059


In [19]:
# print(df_st.shape)
# df_st.head()

(68, 16)


Unnamed: 0,Agency,Division Id,Survey Code,Survey Name,Project Officer,Date Start,Date End,Primary Survey Purpose,Floristics,Strata,Cover Collected,Height Collected,Longitude (Dec/Gda94),Latitude (Dec/Gda94),Site Type,Site Area M2
0,DLRM,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Michael Carnavas,15/09/2016,30-Mar-17,Mapping,Full,All,Y,Y,133.479527,-24.346824,Bounded - square,400
1,DLRM,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Michael Carnavas,15/09/2016,30-Mar-17,Mapping,Full,All,Y,Y,133.476962,-24.348852,Bounded - square,400
2,DLRM,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Michael Carnavas,15/09/2016,30-Mar-17,Mapping,Full,All,Y,Y,133.438976,-24.348822,Bounded - square,400
3,DLRM,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Michael Carnavas,15/09/2016,30-Mar-17,Mapping,Full,All,Y,Y,133.437768,-24.347378,Bounded - square,400
4,DLRM,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Michael Carnavas,15/09/2016,30-Mar-17,Mapping,Full,All,Y,Y,133.446864,-24.344498,Bounded - square,400


In [33]:
df_all

Unnamed: 0,(Division) Id,(Survey) Code,(Survey) Name,(Survey) Primary Purpose,(Site) Code,(Site) Location Description,(Site) Latitude,(Site) Longitude,(Site) Datum Code,(Obs.) Code,...,(Species) U1 Cover,(Species) U2 Cover,(Species) M1 Cover,(Species) G1 Cover,(Species) Total Cover,(Species) Height,(Species) Fruit,(Species) Flower,(Species) Basal Area Collected (Diameter),Nvis Vegetation Hierarchy
0,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,-,-,1,-,-,-,-,-,-,L4/5
1,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,-,-,-,0.1,-,-,-,-,-,L4/5
2,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,-,-,-,1,-,-,-,-,-,L4/5
3,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,2,-,-,-,-,-,-,-,-,L4/5
4,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,-,-,-,3,-,-,-,-,-,L4/5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
825,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC046,-,-24.376900,133.615800,WGS84,OC046,...,-,-,-,0.35,-,-,-,-,-,L4/5
826,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC046,-,-24.376900,133.615800,WGS84,OC046,...,-,-,-,4,-,-,-,-,-,L4/5
827,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC046,-,-24.376900,133.615800,WGS84,OC046,...,-,-,-,10,-,-,-,-,-,L4/5
828,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC046,-,-24.376900,133.615800,WGS84,OC046,...,-,-,-,0.35,-,-,-,-,-,L4/5


In [28]:
df_all.columns

Index(['(Division) Id', '(Survey) Code', '(Survey) Name',
       '(Survey) Primary Purpose', '(Site) Code',
       '(Site) Location Description', '(Site) Latitude', '(Site) Longitude',
       '(Site) Datum Code', '(Obs.) Code', '(Obs.) Date', '(Obs.) Recorder Id',
       '(Obs.) Structural Formation', '(Obs.) Comments',
       '(Obs.) Site Description', '(Species) Taxon Name', '(Species) Family',
       '(Species) Genus', '(Species) Species', '(Species) Infra Rank',
       '(Species) Infra Name', '(Species) Unidentified Species',
       '(Species) Original Id', '(Species) U1 Cover', '(Species) U2 Cover',
       '(Species) M1 Cover', '(Species) G1 Cover', '(Species) Total Cover',
       '(Species) Height', '(Species) Fruit', '(Species) Flower',
       '(Species) Basal Area Collected (Diameter)',
       'Nvis Vegetation Hierarchy'],
      dtype='object')

In [47]:
df_all.drop_duplicates(subset=['(Site) Code'], inplace=True)

In [48]:
df_all

Unnamed: 0,(Division) Id,(Survey) Code,(Survey) Name,(Survey) Primary Purpose,(Site) Code,(Site) Location Description,(Site) Latitude,(Site) Longitude,(Site) Datum Code,(Obs.) Code,...,(Species) U1 Cover,(Species) U2 Cover,(Species) M1 Cover,(Species) G1 Cover,(Species) Total Cover,(Species) Height,(Species) Fruit,(Species) Flower,(Species) Basal Area Collected (Diameter),Nvis Vegetation Hierarchy
0,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC001,-,-24.341997,133.471362,GDA94,OC001,...,-,-,1,-,-,-,-,-,-,L4/5
16,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC002,-,-24.341151,133.472594,GDA94,OC002,...,-,-,2,5,-,-,-,-,-,L4/5
27,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC003,-,-24.355508,133.470736,GDA94,OC003,...,-,-,-,0.125,-,-,-,-,-,L4/5
40,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC004,-,-24.355961,133.482539,GDA94,OC004,...,-,-,-,3,-,-,-,-,-,L4/5
54,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC005,-,-24.343769,133.425748,GDA94,OC005,...,-,-,2,-,-,-,-,-,-,L4/5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
787,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC042,-,-24.353417,133.435762,WGS84,OC042,...,-,-,-,0.5,-,-,-,-,-,L4/5
792,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC043,-,-24.356963,133.440536,WGS84,OC043,...,-,-,-,0.1,-,-,-,-,-,L4/5
797,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC044,-,-24.356063,133.434064,WGS84,OC044,...,-,-,5,-,-,-,-,-,-,L4/5
803,LVA,ORACK,Soil and Land Suitability Assessment for Irrig...,Mapping,OC045,-,-24.373400,133.624500,WGS84,OC045,...,-,-,-,0.1,-,-,-,-,-,L4/5


In [49]:
df = pd.merge(df_all, df_sp, right_on='(Site) Code', left_on = '(Site) Site Id')

KeyError: '(Site) Code'

In [31]:
df

Unnamed: 0,(Division) Id,(Survey) Code,(Survey) Name,(Survey) Primary Purpose,(Site) Code,(Site) Location Description,(Site) Latitude,(Site) Longitude,(Site) Datum Code,(Obs.) Code,...,(Species) Unidentified Species_y,(Species) Original Id_y,(Species) Basal Count,Basal Area,(Species) U1 Cover_y,(Species) U2 Cover_y,(Species) M1 Cover_y,(Species) G1 Cover_y,(Species) Total Cover_y,(Species) Basal Area Collected (Diameter)_y
