# Cliping A tiff File

In [1]:
import rasterio
import rasterio.mask
import fiona
import matplotlib.pyplot as plt
import numpy as np
import os 
import pandas as pd

In [2]:
with fiona.open("../DATASETS/AusPlots_Location/AusPlots_Geometries_20240508.shp", "r") as shapefile:
    shapes = [feature["geometry"] for feature in shapefile]
    shapes_name = [properties["properties"]['Name'] for properties in shapefile]

In [3]:
data_file_path = 'D:/Krish_New/SLGA/'
#data_file_path = 'C:/Users/krish/Desktop/Test/'
slga_datafiles = os.listdir(data_file_path) # list all datafiles 
slga_datafiles.sort()

In [4]:
slga_datafiles

['CLY_000_005_EV_N_P_AU_TRN_N_20210902.tif',
 'CLY_005_015_EV_N_P_AU_TRN_N_20210902.tif',
 'CLY_015_030_EV_N_P_AU_TRN_N_20210902.tif',
 'CLY_030_060_EV_N_P_AU_TRN_N_20210902.tif',
 'CLY_060_100_EV_N_P_AU_TRN_N_20210902.tif',
 'CLY_100_200_EV_N_P_AU_TRN_N_20210902.tif',
 'DER_000_999_EV_N_P_AU_NAT_C_20150601.tif',
 'NTO_000_005_EV_N_P_AU_NAT_C_20140801.tif',
 'NTO_005_015_EV_N_P_AU_NAT_C_20140801.tif',
 'NTO_015_030_EV_N_P_AU_NAT_C_20140801.tif',
 'NTO_030_060_EV_N_P_AU_NAT_C_20140801.tif',
 'NTO_060_100_EV_N_P_AU_NAT_C_20140801.tif',
 'PHW_005_015_EV_N_P_AU_TRN_N_20220520.tif',
 'PHW_015_030_EV_N_P_AU_TRN_N_20220520.tif',
 'PHW_030_060_EV_N_P_AU_TRN_N_20220520.tif',
 'PHW_060_100_EV_N_P_AU_TRN_N_20220520.tif',
 'PHW_100_200_EV_N_P_AU_TRN_N_20220520.tif',
 'PTO_000_005_EV_N_P_AU_NAT_C_20140801.tif',
 'PTO_005_015_EV_N_P_AU_NAT_C_20140801.tif',
 'PTO_015_030_EV_N_P_AU_NAT_C_20140801.tif',
 'PTO_030_060_EV_N_P_AU_NAT_C_20140801.tif',
 'PTO_060_100_EV_N_P_AU_NAT_C_20140801.tif',
 'SLT_000_

In [5]:
column_names = [] # create user_friendly column names 
for i in slga_datafiles:
    split_words = i.split('_')[:3]
    column_names.append('_'.join(split_words))
column_names

['CLY_000_005',
 'CLY_005_015',
 'CLY_015_030',
 'CLY_030_060',
 'CLY_060_100',
 'CLY_100_200',
 'DER_000_999',
 'NTO_000_005',
 'NTO_005_015',
 'NTO_015_030',
 'NTO_030_060',
 'NTO_060_100',
 'PHW_005_015',
 'PHW_015_030',
 'PHW_030_060',
 'PHW_060_100',
 'PHW_100_200',
 'PTO_000_005',
 'PTO_005_015',
 'PTO_015_030',
 'PTO_030_060',
 'PTO_060_100',
 'SLT_000_005',
 'SLT_005_015',
 'SLT_030_060',
 'SLT_060_100',
 'SLT_100_200',
 'pHc_000_005',
 'pHc_005_015',
 'pHc_015_030',
 'pHc_030_060',
 'pHc_060_100']

In [6]:
site_slga_data = pd.DataFrame(columns = column_names, index = shapes_name)
site_slga_data.head()

Unnamed: 0,CLY_000_005,CLY_005_015,CLY_015_030,CLY_030_060,CLY_060_100,CLY_100_200,DER_000_999,NTO_000_005,NTO_005_015,NTO_015_030,...,SLT_000_005,SLT_005_015,SLT_030_060,SLT_060_100,SLT_100_200,pHc_000_005,pHc_005_015,pHc_015_030,pHc_030_060,pHc_060_100
NSABHC0015,,,,,,,,,,,...,,,,,,,,,,
NSABHC0018,,,,,,,,,,,...,,,,,,,,,,
NSABHC0019,,,,,,,,,,,...,,,,,,,,,,
NSABHC0020,,,,,,,,,,,...,,,,,,,,,,
NSABHC0021,,,,,,,,,,,...,,,,,,,,,,


```python


for col_idx, df in enumerate(slga_datafiles):
    
    variable_mean = []
    file_path = data_file_path+df

    with rasterio.open(file_path) as src:

        for i, shape in enumerate(shapes):
            shape = [shape] # shape has to be in a list 
            shape_name = shapes_name[i] # reference the name of  the site 

            out_image, out_transform = rasterio.mask.mask(src, shape, crop=True) # perform masking 
            
            # out_meta = src.meta # update meta data of masked content 

            # out_meta.update({"driver": "GTiff",
            #              "height": out_image.shape[1],
            #              "width": out_image.shape[2],
            #              "transform": out_transform})

            image_mean = np.nanmean(out_image)
            variable_mean.append(image_mean)
        
    site_slga_data[column_names[col_idx]] = variable_mean

        #with rasterio.open(f"../DATASETS/Soils_and_Landscape_Grid_of_Australia/Output/{file_name}/{shape_name}_{file_name}.tif" , "w", **out_meta) as dest:
        #    dest.write(out_image)
```

In [7]:
for col_idx, df in enumerate(slga_datafiles):
    
    variable_mean = []
    file_path = data_file_path+df
    
    split_words = df.split('_')[:3]
    column_name = '_'.join(split_words)
    
    with rasterio.open(file_path) as src:
        print(src)

        for i, shape in enumerate(shapes):

            shape = [shape] # shape has to be in a list 
            shape_name = shapes_name[i] # reference the name of  the site 
            #print(shape_name)
        
            out_image, out_transform = rasterio.mask.mask(src, shape, all_touched = True, crop=True, nodata = -999999) # perform masking 
            out_image_filter = out_image[out_image != -999999]
            image_mean = np.nanmean(out_image_filter)
            variable_mean.append(image_mean)
        
    site_slga_data[column_name] = variable_mean

<open DatasetReader name='D:/Krish_New/SLGA/CLY_000_005_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/CLY_005_015_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/CLY_015_030_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/CLY_030_060_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/CLY_060_100_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/CLY_100_200_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/DER_000_999_EV_N_P_AU_NAT_C_20150601.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/NTO_000_005_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/NTO_005_015_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/NTO_015_030_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/NTO_030_060_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/NTO_060_100_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PHW_005_015_EV_N_P_AU_TRN_N_20220520.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PHW_015_030_EV_N_P_AU_TRN_N_20220520.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PHW_030_060_EV_N_P_AU_TRN_N_20220520.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PHW_060_100_EV_N_P_AU_TRN_N_20220520.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PHW_100_200_EV_N_P_AU_TRN_N_20220520.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PTO_000_005_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PTO_005_015_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PTO_015_030_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PTO_030_060_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/PTO_060_100_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/SLT_000_005_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/SLT_005_015_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/SLT_030_060_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/SLT_060_100_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/SLT_100_200_EV_N_P_AU_TRN_N_20210902.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/pHc_000_005_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/pHc_005_015_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/pHc_015_030_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/pHc_030_060_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


<open DatasetReader name='D:/Krish_New/SLGA/pHc_060_100_EV_N_P_AU_NAT_C_20140801.tif' mode='r'>


  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)
  image_mean = np.nanmean(out_image_filter)


In [8]:
site_slga_data

Unnamed: 0,CLY_000_005,CLY_005_015,CLY_015_030,CLY_030_060,CLY_060_100,CLY_100_200,DER_000_999,NTO_000_005,NTO_005_015,NTO_015_030,...,SLT_000_005,SLT_005_015,SLT_030_060,SLT_060_100,SLT_100_200,pHc_000_005,pHc_005_015,pHc_015_030,pHc_030_060,pHc_060_100
NSABHC0015,16.750000,18.500000,22.500000,24.750000,26.250000,37.500000,49.353374,0.045113,0.041196,0.033463,...,12.500000,13.000000,14.75,15.750000,11.000000,6.778077,6.810408,6.984777,7.166409,7.353638
NSABHC0018,14.666667,16.833334,19.166666,23.000000,27.000000,28.333334,36.075130,0.058668,0.053181,0.039478,...,12.000000,12.333333,14.00,15.666667,10.833333,6.760462,6.850828,7.020003,7.166299,7.340396
NSABHC0019,16.250000,18.250000,20.000000,23.750000,28.000000,28.250000,48.759270,0.050166,0.045422,0.034784,...,12.500000,12.750000,14.00,15.750000,11.000000,6.513579,6.596149,6.790872,7.059542,7.267633
NSABHC0020,15.000000,17.000000,22.500000,24.750000,27.000000,30.750000,30.916204,0.052847,0.047825,0.033557,...,10.000000,11.000000,13.00,14.000000,10.000000,6.771367,6.856721,7.045397,7.198980,7.352275
NSABHC0021,17.750000,21.000000,26.500000,29.000000,29.000000,38.000000,37.567871,0.046372,0.042036,0.032747,...,12.750000,13.250000,15.00,15.500000,11.000000,7.007833,7.095415,7.262317,7.415216,7.571328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WAFWAR0008,7.600000,8.400000,12.000000,17.400000,25.600000,23.200001,4.731328,0.204518,0.164085,0.101007,...,7.800000,7.600000,8.60,9.400000,8.200000,4.642344,4.643826,4.642224,4.677683,4.772305
WAFWAR0009,8.000000,8.400000,10.600000,12.600000,22.000000,12.800000,5.439118,0.197557,0.157514,0.088943,...,8.000000,8.000000,7.00,8.400000,6.200000,4.820743,4.796637,4.820189,4.864625,5.018898
WAGCOO0001,11.200000,12.400000,15.800000,19.799999,18.200001,22.400000,9.971666,0.047585,0.041912,0.030763,...,6.800000,7.200000,7.80,7.000000,8.000000,5.374317,5.390033,5.489561,5.461819,5.604017
WAGCOO0002,8.500000,8.750000,10.500000,11.750000,12.750000,18.500000,12.241350,0.037882,0.033406,0.024979,...,4.250000,4.250000,4.75,4.250000,7.000000,5.238029,5.247667,5.333941,5.336809,5.485204


In [9]:
site_slga_data.to_csv('../DATASETS/Soils_and_Landscape_Grid_of_Australia/Output/site_slga_data.csv')

In [10]:
site_slga_data = pd.read_csv('../DATASETS/Soils_and_Landscape_Grid_of_Australia/Output/site_slga_data.csv', index_col = 0)
site_slga_data.head()

Unnamed: 0,CLY_000_005,CLY_005_015,CLY_015_030,CLY_030_060,CLY_060_100,CLY_100_200,DER_000_999,NTO_000_005,NTO_005_015,NTO_015_030,...,SLT_000_005,SLT_005_015,SLT_030_060,SLT_060_100,SLT_100_200,pHc_000_005,pHc_005_015,pHc_015_030,pHc_030_060,pHc_060_100
NSABHC0015,16.75,18.5,22.5,24.75,26.25,37.5,49.353374,0.045113,0.041196,0.033463,...,12.5,13.0,14.75,15.75,11.0,6.778077,6.810408,6.984777,7.16641,7.353638
NSABHC0018,14.666667,16.833334,19.166666,23.0,27.0,28.333334,36.07513,0.058668,0.053181,0.039478,...,12.0,12.333333,14.0,15.666667,10.833333,6.760462,6.850828,7.020003,7.166299,7.340396
NSABHC0019,16.25,18.25,20.0,23.75,28.0,28.25,48.75927,0.050166,0.045422,0.034784,...,12.5,12.75,14.0,15.75,11.0,6.513579,6.596149,6.790872,7.059542,7.267633
NSABHC0020,15.0,17.0,22.5,24.75,27.0,30.75,30.916204,0.052847,0.047825,0.033557,...,10.0,11.0,13.0,14.0,10.0,6.771367,6.856721,7.045397,7.19898,7.352275
NSABHC0021,17.75,21.0,26.5,29.0,29.0,38.0,37.56787,0.046372,0.042036,0.032747,...,12.75,13.25,15.0,15.5,11.0,7.007833,7.095415,7.262317,7.415216,7.571328


In [11]:
site_slga_data['PHW_005_015']

NSABHC0015    8.400000
NSABHC0018    7.900000
NSABHC0019    7.900000
NSABHC0020    8.025000
NSABHC0021    8.300000
                ...   
WAFWAR0008    6.120000
WAFWAR0009    6.440000
WAGCOO0001    6.639999
WAGCOO0002    6.425000
WAGCOO0004    7.150000
Name: PHW_005_015, Length: 907, dtype: float64

In [12]:
groups = []
for i in site_slga_data.index:
    groups.append(i[:6])
    
site_slga_data_1 = site_slga_data.copy()
site_slga_data_1['group'] = groups