In [3]:
import numpy as np                                                                                     
import matplotlib.pyplot as plt                                 
import pandas as pd                                             
import geopandas as gpd                                         
import rasterio
from rasterstats import zonal_stats
from sklearn.preprocessing import Normalizer, StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from mlxtend.feature_selection import SequentialFeatureSelector, ExhaustiveFeatureSelector
import seaborn as sns
import os

# turns on 'nline plotting', where plot graphics will appear in the notebook below the cell that produced them.
%matplotlib inline              

# Change module setting
pd.options.display.max_colwidth = 80       # Longer text in pd.df

## Data Cleaning and Munging

In [5]:
path= 'F:/Project/2023'
os.chdir(path)

In [7]:
# reads the corn plots
plots= gpd.read_file('2023_plots/plots.shp')


# reads the SAR_optical pairs for each growing period
early_july= rasterio.open('SAR_optical/July/Early_July/16_5_stacked.tif')
late_july= rasterio.open('SAR_optical/July/Late_July/16_5_stacked.tif')
august= rasterio.open('SAR_optical/August/16_5_stacked.tif')
september= rasterio.open('SAR_optical/September/16_5_stacked.tif')

# puts them in a list for looping 
all_4= [early_july.read(), late_july.read(), august.read(), september.read()]

# Transforms pixel coordinates to x,y coordinaetes
affine= late_july.transform

In [23]:
# column names for separating the monthly data into separate dataframes and appending them row-wise
names= ['S1_ELLIP', 'S1_ORIEN', 'S1_LPR', 'S1_I2', 'S1_I1', 'S1_g3', 'S1_g2', 'S1_g1', 'S1_g0', 'S1_DoLP',
            'S1_span', 'S1_entropy_shannon', 'S1_entropy', 'S1_alpha', 'S1_C22', 'S1_C11', 'S2_NDVI', 'S2_NDRE', 'S2_NDWI', 'S2_RECI', 'S2_GCI']

# creates special column names for each month's data
el= [name + '_el' for name in names]
ll= [name + '_ll' for name in names]
au= [name + '_au' for name in names]
se= [name + '_se' for name in names]

# unpacks the names in the above lists into a new list
all_names= [*el , *ll , *au ,*se]


# column names for final training data
#new_names= ['Id', 'Name', 'geometry', 'S1_ELLIP', 'S1_ORIEN', 'S1_LPR', 'S1_I2', 'S1_I1', 'S1_g3', 'S1_g2', 'S1_g1', 'S1_g0', 'S1_DoLP',
            #'S1_span', 'S1_entropy_shannon', 'S1_entropy', 'S1_alpha', 'S1_C22', 'S1_C11', 'S2_NDVI', 'S2_NDRE', 'S2_NDWI', 'S2_RECI', 'S2_GCI']

In [26]:
# finds the median of each plot for all the 21 features
median= []
for each in all_4:
    for band in each:
        df_median = pd.DataFrame(zonal_stats(plots, band, affine=affine, stats= 'median'))
        median.append(df_median)

# the result of the above cells is a dataframe for each feature for each month, so this line of code appends them
median_df= pd.concat(median, axis=1)

# renames the columns and splits them based on the growing stage
median_df.columns= all_names
median_el= median_df.filter(like= '_el')
median_ll= median_df.filter(like= '_ll')
median_au= median_df.filter(like= '_au')
median_se= median_df.filter(like= '_se')

# adds median statistics of each growing stage to the plots GeoDataFrame for visualization
median_el_plots = pd.concat([plots, median_el], axis=1)
median_ll_plots = pd.concat([plots, median_ll], axis=1)
median_au_plots = pd.concat([plots, median_au], axis=1)
median_se_plots = pd.concat([plots, median_se], axis=1)

# renames the columns of all dataframes for row-wise concatenation
median_el_plots.columns= new_names
median_ll_plots.columns= new_names
median_au_plots.columns= new_names
median_se_plots.columns= new_names

# row-wise concatenation of data from each growing stage
train_pts= pd.concat([median_el_plots, median_ll_plots, median_au_plots, median_se_plots]).reset_index(drop=True).drop('Id', axis= 1)

# specifies the treatment for each plot
def Treatment_extractor(x):
    return x[0:2]

train_pts['Treatment']= train_pts['Name'].apply(Treatment_extractor)



In [28]:
train_pts

Unnamed: 0,Name,geometry,S1_ELLIP,S1_ORIEN,S1_LPR,S1_I2,S1_I1,S1_g3,S1_g2,S1_g1,...,S1_entropy,S1_alpha,S1_C22,S1_C11,R_NDVI,R_NDRE,R_NDWI,R_RECI,R_GCI,Treatment
0,T2R5,"POLYGON ((634092.234 4697157.568, 634158.210 4697157.125, 634157.648 4697073...",0.966687,-6.138963,0.179745,0.007882,0.053419,0.001407,-0.009425,0.041992,...,0.568433,17.912964,0.008156,0.049347,0.345594,0.262896,-0.353637,0.713320,1.094238,T2
1,T3R5,"POLYGON ((634192.196 4697156.718, 634258.179 4697156.940, 634258.460 4697073...",-3.856736,-8.543944,0.292341,0.021344,0.087852,-0.007624,-0.016156,0.055583,...,0.733198,24.454109,0.024538,0.079806,0.438710,0.341483,-0.418864,1.037187,1.441536,T3
2,T1R5,"POLYGON ((634002.183 4697036.651, 634067.312 4697036.871, 634067.597 4696952...",-1.562623,3.704603,0.425677,0.016520,0.052554,-0.001586,0.004815,0.027467,...,0.770572,30.938370,0.018187,0.047084,0.386156,0.307217,-0.389557,0.886905,1.276312,T1
3,T4R5,"POLYGON ((634192.479 4697047.694, 634258.177 4697047.694, 634258.177 4696962...",-1.333437,-0.787224,0.128169,0.006096,0.054566,-0.002280,-0.001800,0.048562,...,0.493920,13.075060,0.006557,0.051782,0.202121,0.155074,-0.257947,0.367070,0.695235,T4
4,T1R1,"POLYGON ((633913.265 4696916.866, 633978.400 4696916.646, 633978.113 4696831...",-5.963002,-0.250282,0.238797,0.015221,0.075841,-0.012849,-0.000663,0.057722,...,0.667392,21.796854,0.012430,0.060300,0.367259,0.284634,-0.379122,0.795801,1.221255,T1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,T7R2,"POLYGON ((634021.243 4696238.964, 634087.576 4696238.350, 634090.033 4696151...",2.746288,2.487178,0.268144,0.014927,0.061110,0.003633,0.003924,0.041953,...,0.732750,22.173309,0.016195,0.058195,0.447960,0.277298,-0.418378,0.767397,1.438667,T7
164,T5R4,"POLYGON ((634220.241 4696248.791, 634287.802 4696249.405, 634289.644 4696160...",-2.419377,6.095829,0.238199,0.012765,0.074588,-0.004147,0.011305,0.052082,...,0.670709,21.525810,0.014275,0.069163,0.474460,0.310828,-0.424705,0.902036,1.476476,T5
165,T5R6,"POLYGON ((633521.293 4696577.997, 633588.239 4696578.611, 633588.239 4696492...",0.111362,-1.917303,0.346872,0.023761,0.074445,0.000156,-0.002825,0.048318,...,0.804796,26.467976,0.024799,0.072341,0.456358,0.281648,-0.403223,0.784154,1.351336,T5
166,T6R6,"POLYGON ((633623.248 4696578.611, 633688.352 4696577.997, 633690.195 4696490...",-4.537666,15.987346,0.251162,0.013243,0.077864,-0.009780,0.026251,0.052241,...,0.590227,25.134678,0.016778,0.069693,0.499437,0.347022,-0.478720,1.062892,1.836709,T6
