In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
# Set some directories. Here we use absolute directories. 
cwd = 'c:\\Users\\m1865\\Desktop\\Ticino'
cwd_Field = cwd + '\\FieldData'
cwd_Field_Data2 = cwd_Field + '\\Field Dataset 2'

In [7]:
# Read the Excel file which contains all the geographical coordinates of the field plots! 
df_Field_Coord = pd.read_excel(cwd_Field_Data2 + "\\Dati coordinate plot forestali Parco Ticino.xlsx")
df_Field_Coord

Unnamed: 0,n° plot,Y,X
0,1,45.712265,8.757804
1,2,45.702801,8.750167
2,3,45.639628,8.66678
3,4,45.630213,8.66777
4,5,45.552661,8.703548
5,6,45.542031,8.705301
6,7,45.53641,8.704414
7,8,45.507882,8.706822
8,9,45.70544,8.67557
9,10,45.705992,8.660657


In [10]:
# Immediately create our shapefile from this excel sheet
gdf_Field_All_4326 = gpd.GeoDataFrame(
    df_Field_Coord['n° plot'],
    geometry = gpd.points_from_xy(df_Field_Coord['X'],df_Field_Coord['Y']),
    crs = 'EPSG:4326'
)
# Create another copy but with UTM crs
gdf_Field_All_32632 = gdf_Field_All_4326.to_crs(32632)
# Add the geographical coordinates to the columns and save to local shapefiles! 
gdf_Field_All_4326['Lat'] = df_Field_Coord['Y']
gdf_Field_All_4326['Long'] = df_Field_Coord['X']
gdf_Field_All_32632['UTM_X'] = gdf_Field_All_32632.geometry.x
gdf_Field_All_32632['UTM_Y'] = gdf_Field_All_32632.geometry.y
gdf_Field_All_4326

Unnamed: 0,n° plot,geometry,Lat,Long
0,1,POINT (8.75780 45.71227),45.712265,8.757804
1,2,POINT (8.75017 45.70280),45.702801,8.750167
2,3,POINT (8.66678 45.63963),45.639628,8.66678
3,4,POINT (8.66777 45.63021),45.630213,8.66777
4,5,POINT (8.70355 45.55266),45.552661,8.703548
5,6,POINT (8.70530 45.54203),45.542031,8.705301
6,7,POINT (8.70441 45.53641),45.53641,8.704414
7,8,POINT (8.70682 45.50788),45.507882,8.706822
8,9,POINT (8.67557 45.70544),45.70544,8.67557
9,10,POINT (8.66066 45.70599),45.705992,8.660657


In [11]:
# Save the these geodataframes into local shapefiles! 
gdf_Field_All_4326.to_file(cwd_Field + "\\Field Data 2 All Points (4326).shp")
gdf_Field_All_32632.to_file(cwd_Field + "\\Field Data 2 All Points (32632).shp")

In [12]:
# However there are some plots outside our RoI. Therefore we should remove those invalid plots. 
# Load our region of interest shapefile! 
gdf_RoI = gpd.read_file(cwd_Field + "\\ShapefileCorretti\\confini_foreste_corretti.shp")

In [18]:
# Check which field plots (points) are within our RoI! And save their plot numbers into a list
gdf_InnerJoin = gdf_RoI.sjoin(gdf_Field_All_32632, how="inner").sort_values('n° plot')
list_ValidPlots = pd.unique(gdf_InnerJoin['n° plot'])
list_ValidPlots

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 20,
       21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39],
      dtype=int64)

In [19]:
# Only keep the valid plots and recreate another new dataframe which only contains the field plots inside our RoI! 
df_Field_Valid = df_Field_Coord.copy()
df_Field_Valid = df_Field_Valid.where(df_Field_Valid['n° plot'].isin(list_ValidPlots)).dropna().reset_index(drop = True)
df_Field_Valid['n° plot'] = df_Field_Valid['n° plot'].astype('Int64')
df_Field_Valid

Unnamed: 0,n° plot,Y,X
0,1,45.712265,8.757804
1,2,45.702801,8.750167
2,3,45.639628,8.66678
3,4,45.630213,8.66777
4,5,45.552661,8.703548
5,6,45.542031,8.705301
6,7,45.53641,8.704414
7,8,45.507882,8.706822
8,9,45.70544,8.67557
9,10,45.705992,8.660657


In [20]:
# Now we create new geodataframes from our new dataframes
gdf_Field_Valid_4326 = gpd.GeoDataFrame(
    df_Field_Valid['n° plot'],
    geometry = gpd.points_from_xy(df_Field_Valid['X'],df_Field_Valid['Y']),
    crs = 'EPSG:4326'
)
# Convert the crs from 4326 (WGS84) to 32632 (WGS84 UTM zone 32N)
gdf_Field_Valid_32632 = gdf_Field_Valid_4326.to_crs(32632)
# Add the geographical coordinates to the columns and save to local shapefiles! 
gdf_Field_Valid_4326['Lat'] = df_Field_Valid['Y']
gdf_Field_Valid_4326['Long'] = df_Field_Valid['X']
gdf_Field_Valid_4326.to_file(cwd_Field + "\\Field Data 2 Valid Points (4326).shp")
gdf_Field_Valid_32632['UTM_X'] = gdf_Field_Valid_32632.geometry.x
gdf_Field_Valid_32632['UTM_Y'] = gdf_Field_Valid_32632.geometry.y
gdf_Field_Valid_32632.to_file(cwd_Field + "\\Field Data 2 Valid Points (32632).shp")

# OnHold! Calculation of Index!

In [3]:
# Since our Excel file contains multiple sheets, we need to get the names of sheets firstly. 
xlsx = pd.ExcelFile(cwd_Field_Data2 + '\\Dati plot forestali Parco Ticino completo.xlsx')
xlsx.sheet_names

['Querce caducifoglie',
 'Acero-fra, Orno-ost',
 'Castagneti',
 'Pinete con lat',
 'Lat alloctone',
 'Lat meso con conifere',
 'TEMPLATE']

In [4]:
# Combine! 
xlsx_field = pd.DataFrame()
for sheet in xlsx.sheet_names:
    if sheet != 'TEMPLATE':
        temp_df = pd.read_excel(cwd_Field_Data2 + '\\Dati plot forestali Parco Ticino completo.xlsx', sheet_name=sheet)
        xlsx_field = pd.concat([xlsx_field, temp_df], ignore_index=True)
xlsx_field

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,Tipo forestale:,,Querce caducifoglie (colore rosso),,,
1,,,,,,
2,ha factor,31.830989,,,,
3,Radius [m],10,,,,
4,,,,,,
...,...,...,...,...,...,...
972,40,9,casa,25.5,,
973,40,10,prse,10.5,,
974,40,11,quro,41.5,,
975,40,12,quro,48,,


In [5]:
# By observing our concatenated dataframe, we only need to keep the first 3 columns. 
# Drop those columns! 
xlsx_field = xlsx_field.drop(xlsx_field.iloc[:,np.r_[1,np.arange(3,6,1)]], axis = 1)
xlsx_field

Unnamed: 0.1,Unnamed: 0,Unnamed: 2
0,Tipo forestale:,Querce caducifoglie (colore rosso)
1,,
2,ha factor,
3,Radius [m],
4,,
...,...,...
972,40,casa
973,40,prse
974,40,quro
975,40,quro


In [6]:
# Now we need to drop the useless rows. If the first column of a row is not a numeric value, then it will be dropped. 
df_field = xlsx_field.copy()
for row in range(0,xlsx_field.shape[0]):
    # Get the index label of the current row! 
    temp_label = xlsx_field.iloc[row].name
    if str(xlsx_field.iloc[row,0]).isnumeric(): # Note: we have to convert the value to str first, otherwise we can't apply "isnumeric()" function. 
        continue
    else: 
        # Drop the row based on the label! 
        df_field = df_field.drop(index=temp_label, axis = 0)
# Reset index
df_field = df_field.reset_index(drop = True)
# Assign column names
df_field.columns = ["Plot Number", "Tree Species"]
# Finally we get our clean dataframe! 
df_field

Unnamed: 0,Plot Number,Tree Species
0,1,quru
1,1,pisy
2,1,quru
3,1,quru
4,1,quru
...,...,...
834,40,casa
835,40,prse
836,40,quro
837,40,quro
