In [142]:
import ee
import geemap
import pandas as pd
import numpy as np
import plotly.express as px

ee.Initialize()
Map = geemap.Map()

https://kaflekrishna.com.np/blog-detail/extraction-raster-values-point-samples-google-earth-engine-gee/

In [143]:
def getNDVI(image):
    # Define the bands for Landsat 8
    ndvi = image.select('NDVI')
    
    # Return the image with the added NDVI band
    return image.addBands(ndvi)

def getEVI(image):
    # Define the bands for Landsat 8
    evi = image.select('EVI')
    
    # Return the image with the added NDVI band
    return image.addBands(evi)

def addDate(image):
    img_date = ee.Date(image.date())
    img_date = ee.Number.parse(img_date.format('YYYYMMdd'))
    return image.addBands(ee.Image(img_date).rename('date').toInt())

In [144]:
# Retrieve satalite images between specified dates
l8_ndvi_data = ee.ImageCollection('LANDSAT/LC08/C01/T1_32DAY_NDVI') \
    .filterDate("2013-12-01", "2014-06-07") \
    .map(getNDVI).map(addDate)

l8_evi_data = ee.ImageCollection('LANDSAT/LC08/C01/T1_32DAY_EVI') \
    .filterDate("2013-12-01", "2014-06-07") \
    .map(getEVI).map(addDate)

In [145]:
# Load in location coordinates
plot_df = pd.read_csv(r"C:\Users\AdamMorgan\OneDrive\UCL\Research Project\Data\location_coordinates.csv")

In [146]:
# Convert dataframe into Feature Collection for GEE
features=[]
for index, row in plot_df.iterrows():
#     construct the geometry from dataframe
    poi_geometry = ee.Geometry.Point([row['X'], row['Y']])
#     construct the attributes (properties) for each point 
    poi_properties = dict(row)
#     construct feature combining geometry and properties
    poi_feature = ee.Feature(poi_geometry, poi_properties)
    features.append(poi_feature)

# final Feature collection assembly
ee_fc = ee.FeatureCollection(features) 

In [147]:
def rasterExtraction(image):
    feature = image.sampleRegions(
        collection = ee_fc, # feature collection here
        scale = 10 # Cell size of raster
    )
    return feature

In [148]:
ndvi_results = l8_ndvi_data.filterBounds(ee_fc).select('NDVI').map(addDate).map(rasterExtraction).flatten()
evi_results = l8_evi_data.filterBounds(ee_fc).select('EVI').map(addDate).map(rasterExtraction).flatten()

In [149]:
ndvi_sample_result = ndvi_results.first().getInfo()
evi_sample_result = evi_results.first().getInfo()

In [150]:
# extract the properties column from feature collection- column order may not be as our sample data order
ndvi_columns = list(ndvi_sample_result['properties'].keys())
evi_columns = list(evi_sample_result['properties'].keys())

# Order data column as per sample data
# You can modify this for better optimization
ndvi_column_df = list(plot_df.columns)
evi_column_df = list(plot_df.columns)

#column_df.extend(['NDVI', 'SAVI', 'EVI', 'GNDVI', 'NDBI', 'date'])
ndvi_column_df.extend(['NDVI', 'date'])
evi_column_df.extend(['EVI', 'date'])

In [151]:
# Create NDVI dataframe
ndvi_nested_list = ndvi_results.reduceColumns(ee.Reducer.toList(len(ndvi_column_df)), ndvi_column_df).values().get(0)
ndvi_data = ndvi_nested_list.getInfo()

# dont forget we need to call the callback method "getInfo" to retrieve the data
ndvi_df = pd.DataFrame(ndvi_data, columns=ndvi_column_df)

# Create EVI dataframe
evi_nested_list = evi_results.reduceColumns(ee.Reducer.toList(len(evi_column_df)), evi_column_df).values().get(0)
evi_data = evi_nested_list.getInfo()

# dont forget we need to call the callback method "getInfo" to retrieve the data
evi_df = pd.DataFrame(evi_data, columns=evi_column_df)

In [152]:
ndvi_df.head(5)

Unnamed: 0,Location,X,Y,NDVI,date
0,AGUGLIANO,13.363,43.545,0.043876,20131223
1,ARGELATO,11.34445,44.650532,0.493778,20131223
2,CAMMARATA,13.73084,37.634871,0.013995,20131223
3,FIORENZUOLA,9.894081,44.927663,0.076173,20131223
4,FOGGIA,15.502678,41.462218,0.063762,20131223


In [153]:
# Merge dataframes together
merged_df = pd.merge(ndvi_df, evi_df, on=['Location', 'X', 'Y', 'date'], how='outer')

# Reorder columns
column_order = ['Location', 'X', 'Y', 'date', 'NDVI','EVI']
df = merged_df[column_order]
df.head(3)

Unnamed: 0,Location,X,Y,date,NDVI,EVI
0,AGUGLIANO,13.363,43.545,20131223,0.043876,0.273331
1,ARGELATO,11.34445,44.650532,20131223,0.493778,0.546566
2,CAMMARATA,13.73084,37.634871,20131223,0.013995,0.140016


In [154]:
df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')

# Extract year, month, and day to new columns
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
#df['day'] = df['date'].dt.day

# Reorder columns
column_order = ['Location', 'X', 'Y', 'date', 'year', 'month', 'NDVI','EVI']
df = df[column_order]

# Rename columns
df = df.rename(columns={
    'X': 'coordinate x',
    'Y': 'coordinate y'
})

df.head(5)

Unnamed: 0,Location,coordinate x,coordinate y,date,year,month,NDVI,EVI
0,AGUGLIANO,13.363,43.545,2013-12-23,2013,12,0.043876,0.273331
1,ARGELATO,11.34445,44.650532,2013-12-23,2013,12,0.493778,0.546566
2,CAMMARATA,13.73084,37.634871,2013-12-23,2013,12,0.013995,0.140016
3,FIORENZUOLA,9.894081,44.927663,2013-12-23,2013,12,0.076173,0.222927
4,FOGGIA,15.502678,41.462218,2013-12-23,2013,12,0.063762,0.332077


In [155]:
location_counts = df['Location'].value_counts()
print(location_counts)

Location
AGUGLIANO               6
CAMMARATA               6
FOGGIA                  6
LIBERTINIA              6
MONTELIBRETTI           6
S.ANGELO LODIGIANO      6
S.STEFANO QUISQUINIA    6
TOLENTINO               6
ARGELATO                5
FIORENZUOLA             5
Name: count, dtype: int64


In [156]:
# I will plot line plot for single point only for now.
# I will cover detail analaysis in next plot
df_filtered = df[df['Location']=='AGUGLIANO']
df_filtered['date'] = pd.to_datetime(df_filtered['date'], format='%Y%m%d')
df_filtered.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,date,year,month,NDVI,EVI
0,AGUGLIANO,13.363,43.545,2013-12-23,2013,12,0.043876,0.273331
10,AGUGLIANO,13.363,43.545,2014-01-01,2014,1,0.043876,0.273331
20,AGUGLIANO,13.363,43.545,2014-02-02,2014,2,0.257505,0.288275


In [157]:
date_counts = df_filtered['date'].value_counts()
print(date_counts)

date
2013-12-23    1
2014-01-01    1
2014-02-02    1
2014-03-06    1
2014-04-07    1
2014-05-09    1
Name: count, dtype: int64


In [158]:
v_df = px.data.stocks()
fig = px.line(df_filtered, x='date', y="NDVI")
fig.show()

### PART 2

In [159]:
def format_year_month_with_prefix(year, month, prefix):
    month_str_mapping = {
        1: 'JAN', 2: 'FEB', 3: 'MAR', 4: 'APR', 5: 'MAY', 6: 'JUN',
        7: 'JUL', 8: 'AUG', 9: 'SEP', 10: 'OCT', 11: 'NOV', 12: 'DEC'
    }
    month_str = month_str_mapping[month]
    formatted_str = prefix + month_str + "_" + str(year)
    return formatted_str

# Initialize a list to store the data dictionaries for each location
data_dicts = []

locations = df['Location'].unique()

for location in locations:
    location_df = df[df['Location'] == location].iloc[0]  # get the first row for location
    data_dict = {
        'Location': location,
        'coordinate x': location_df['coordinate x'],  # assuming you have a 'coordinate x' column
        'coordinate y': location_df['coordinate y']   # assuming you have a 'coordinate y' column
    }
    
    for _, row in df[df['Location'] == location].iterrows():
        ndvi_col = format_year_month_with_prefix(row['year'], row['month'], 'NDVI_')
        evi_col = format_year_month_with_prefix(row['year'], row['month'], 'EVI_')
        data_dict[ndvi_col] = row['NDVI']
        data_dict[evi_col] = row['EVI']

    data_dicts.append(data_dict)

# Convert list of dictionaries into a DataFrame
result_df = pd.DataFrame(data_dicts)

# Ensuring the location is a categorical column for correct sorting
result_df['Location'] = result_df['Location'].astype('category')

# Results
result_df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,EVI_MAR_2014,NDVI_APR_2014,EVI_APR_2014,NDVI_MAY_2014,EVI_MAY_2014
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,0.28364,0.707269,0.716885,0.066296,0.303205
1,ARGELATO,11.34445,44.650532,0.493778,0.546566,0.574462,0.623811,,,0.655395,0.760731,0.766244,0.825879,0.43229,0.443176
2,CAMMARATA,13.73084,37.634871,0.013995,0.140016,0.013995,0.140016,0.490471,0.57641,0.183312,0.512542,0.408208,0.377535,0.347182,0.376438


In [160]:
rt_df = pd.read_csv(r"C:\Users\AdamMorgan\OneDrive\UCL\Research Project\Code and results\Temp and rainfall\temp_rainfall.csv")
rt_df.head(3)

Unnamed: 0,Location,TMP_DEC_2013,AMT_DEC_2013,MaxT_DEC_2013,MinT_DEC_2013,TMP_JAN_2014,AMT_JAN_2014,MaxT_JAN_2014,MinT_JAN_2014,TMP_FEB_2014,...,MaxT_MAR_2014,MinT_MAR_2014,TMP_APR_2014,AMT_APR_2014,MaxT_APR_2014,MinT_APR_2014,TMP_MAY_2014,AMT_MAY_2014,MaxT_MAY_2014,MinT_MAY_2014
0,S.STEFANO QUISQUINIA,56.7,11.306452,14.8,7.4,80.6,10.880645,15.8,7.6,72.3,...,15.2,6.1,50.9,14.38,19.1,9.6,25.3,17.441935,22.0,13.4
1,CAMMARATA,53.1,9.877419,12.0,6.5,80.9,9.251613,13.3,4.9,69.0,...,16.5,4.9,55.8,13.096667,18.0,7.6,29.2,16.46129,21.2,12.5
2,LIBERTINIA,63.1,10.735484,13.2,8.4,54.5,10.629032,14.4,7.3,69.2,...,16.2,6.8,40.0,14.74,19.1,9.1,14.9,17.925806,22.8,14.3


In [161]:
# Merge dataframes together
merged_df = pd.merge(result_df, rt_df, on=['Location'], how='outer')

# Results
merged_df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,...,MaxT_MAR_2014,MinT_MAR_2014,TMP_APR_2014,AMT_APR_2014,MaxT_APR_2014,MinT_APR_2014,TMP_MAY_2014,AMT_MAY_2014,MaxT_MAY_2014,MinT_MAY_2014
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,16.4,6.9,55.0,13.533333,16.5,10.1,57.3,16.519355,21.5,11.7
1,ARGELATO,11.34445,44.650532,0.493778,0.546566,0.574462,0.623811,,,0.655395,...,15.8,8.1,67.8,15.84,19.5,12.5,57.6,17.967742,22.2,13.7
2,CAMMARATA,13.73084,37.634871,0.013995,0.140016,0.013995,0.140016,0.490471,0.57641,0.183312,...,16.5,4.9,55.8,13.096667,18.0,7.6,29.2,16.46129,21.2,12.5


In [162]:
merged_df = merged_df.rename(columns={
    'coordinate x_x': 'coordinate x',
    'coordinate y_x': 'coordinate y',
    'Location ': 'Location'
})

df = merged_df

df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,...,MaxT_MAR_2014,MinT_MAR_2014,TMP_APR_2014,AMT_APR_2014,MaxT_APR_2014,MinT_APR_2014,TMP_MAY_2014,AMT_MAY_2014,MaxT_MAY_2014,MinT_MAY_2014
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,16.4,6.9,55.0,13.533333,16.5,10.1,57.3,16.519355,21.5,11.7
1,ARGELATO,11.34445,44.650532,0.493778,0.546566,0.574462,0.623811,,,0.655395,...,15.8,8.1,67.8,15.84,19.5,12.5,57.6,17.967742,22.2,13.7
2,CAMMARATA,13.73084,37.634871,0.013995,0.140016,0.013995,0.140016,0.490471,0.57641,0.183312,...,16.5,4.9,55.8,13.096667,18.0,7.6,29.2,16.46129,21.2,12.5


In [163]:
yield_df = pd.read_csv(r"C:\Users\AdamMorgan\OneDrive\UCL\Research Project\Data\Yield data\yield data.csv")
yield_df.head(3)

Unnamed: 0,Specie,Year,Areal,Region,District,Location,Coordinate x,Coordinate y,MARS ID,Variety,...,N uptake,Prot_PCss,Bianconati_PC,Stremenziti_PC,Spigatda1Marzo_gg,Altezzapianta_cm,Allettamentoa mat_09,Rugginebruna_09,Oidio_09,Septoria_09
0,Frumento_Duro,2013,2-Cost. Adriatica: Nord-Centro,MARCHE,AN,AGUGLIANO,43.545,13.363,69120,Achille,...,142.793478,12.5,59.9,2.1,62.0,92.67,,,,1.67
1,Frumento_Duro,2013,2-Cost. Adriatica: Nord-Centro,MARCHE,AN,AGUGLIANO,43.545,13.363,69120,Anco Marzio,...,136.650522,13.5,43.0,1.57,58.0,86.67,,,,1.0
2,Frumento_Duro,2013,2-Cost. Adriatica: Nord-Centro,MARCHE,AN,AGUGLIANO,43.545,13.363,69120,Athoris,...,124.508348,13.0,43.63,1.9,63.0,83.0,,,,1.0


In [164]:
yield_df = yield_df.rename(columns={'Location ': 'Location'})

In [165]:
merged_df = pd.merge(df, yield_df, on=['Location'], how='outer')
merged_df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,...,N uptake,Prot_PCss,Bianconati_PC,Stremenziti_PC,Spigatda1Marzo_gg,Altezzapianta_cm,Allettamentoa mat_09,Rugginebruna_09,Oidio_09,Septoria_09
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,142.793478,12.5,59.9,2.1,62.0,92.67,,,,1.67
1,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,136.650522,13.5,43.0,1.57,58.0,86.67,,,,1.0
2,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,124.508348,13.0,43.63,1.9,63.0,83.0,,,,1.0


In [166]:
df = merged_df[merged_df['Year'] == 2014]

In [167]:
soil_df = pd.read_csv(r"C:\Users\AdamMorgan\OneDrive\UCL\Research Project\Code and results\Data extraction and preprocessing\soil_data.csv")
soil_df.head(3)

Unnamed: 0,Location,nitrogen_0-5cm_mean,nitrogen_100-200cm_mean,nitrogen_15-30cm_mean,nitrogen_30-60cm_mean,nitrogen_5-15cm_mean,nitrogen_60-100cm_mean,soc_0-5cm_mean,soc_100-200cm_mean,soc_15-30cm_mean,...,sand_15-30cm_mean,sand_30-60cm_mean,sand_5-15cm_mean,sand_60-100cm_mean,silt_0-5cm_mean,silt_100-200cm_mean,silt_15-30cm_mean,silt_30-60cm_mean,silt_5-15cm_mean,silt_60-100cm_mean
0,AGUGLIANO,2654,849,1640,1642,1230,973,459,71,199,...,103,115,89,121,470,502,477,491,478,487
1,ARGELATO,4837,1095,1565,1368,1600,1133,396,56,148,...,69,85,61,92,499,517,527,533,484,524
2,CAMMARATA,2502,912,1449,1172,1728,871,447,51,164,...,294,312,234,314,450,396,398,403,427,404


In [168]:
df = pd.merge(df, soil_df, on=['Location'], how='outer')
df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,...,sand_15-30cm_mean,sand_30-60cm_mean,sand_5-15cm_mean,sand_60-100cm_mean,silt_0-5cm_mean,silt_100-200cm_mean,silt_15-30cm_mean,silt_30-60cm_mean,silt_5-15cm_mean,silt_60-100cm_mean
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487
1,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487
2,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487


In [169]:
filtered_df = df.drop(columns=['Coordinate y', 'Coordinate x', 'Specie'])

# Dropping because Yanming
filtered_df = filtered_df.drop(columns=['Indice resa'])

In [170]:
filtered_df = filtered_df.rename(columns={
    'Year': 'year',
    'Peso1000semi': 'weight of 1000 seeds',
    'Prot_PCss': 'Prot_PC_bio',
})

filtered_df.head(3)

Unnamed: 0,Location,coordinate x,coordinate y,NDVI_DEC_2013,EVI_DEC_2013,NDVI_JAN_2014,EVI_JAN_2014,NDVI_FEB_2014,EVI_FEB_2014,NDVI_MAR_2014,...,sand_15-30cm_mean,sand_30-60cm_mean,sand_5-15cm_mean,sand_60-100cm_mean,silt_0-5cm_mean,silt_100-200cm_mean,silt_15-30cm_mean,silt_30-60cm_mean,silt_5-15cm_mean,silt_60-100cm_mean
0,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487
1,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487
2,AGUGLIANO,13.363,43.545,0.043876,0.273331,0.043876,0.273331,0.257505,0.288275,0.04719,...,103,115,89,121,470,502,477,491,478,487


In [171]:
# Get column indices
column_indices = {column_name: index for index, column_name in enumerate(filtered_df.columns)}


In [172]:
cols = filtered_df.columns.tolist()
subset = [cols[31], cols[27], cols[32], cols[28], cols[29], cols[30]]  # Columns 'B' and 'C'

# Remove 'B' and 'C' from their original positions
for col in subset:
    cols.remove(col)

# Insert 'B' and 'C' after position 3
position_to_insert = 0
for i, col in enumerate(subset):
    cols.insert(position_to_insert + i, col)

df = filtered_df[cols]

## Creating datasets for the model

In [173]:
# Get column names
column_names = df.columns.tolist()
# column_names

### Prod_13pcUM - grain yield should be included tbh - was not included higher up - change this

In [174]:
predict_cols = ['Prod_13pcUM','N uptake','Prot_PC_bio', 'Variety', 'year']

location_cols = ['Areal','Region','District','Location','coordinate x','coordinate y']

# VI
NDVI_2014_cols = ['NDVI_DEC_2013','NDVI_JAN_2014','NDVI_FEB_2014','NDVI_MAR_2014','NDVI_APR_2014','NDVI_MAY_2014']
EVI_2014_cols = ['EVI_DEC_2013','EVI_JAN_2014','EVI_FEB_2014','EVI_MAR_2014','EVI_APR_2014','EVI_MAY_2014']
VI_2014 = NDVI_2014_cols + EVI_2014_cols

# Temp
MaxT_2014_cols = ['MaxT_DEC_2013','MaxT_JAN_2014','MaxT_FEB_2014','MaxT_MAR_2014','MaxT_APR_2014','MaxT_MAY_2014']
MinT_2014_cols = ['MinT_DEC_2013','MinT_JAN_2014','MinT_FEB_2014','MinT_MAR_2014','MinT_APR_2014','MinT_MAY_2014']
TMP_2014_cols = ['TMP_DEC_2013','TMP_JAN_2014','TMP_FEB_2014','TMP_MAR_2014','TMP_APR_2014','TMP_MAY_2014']
AMT_2014_cols = ['AMT_DEC_2013','AMT_JAN_2014','AMT_FEB_2014','AMT_MAR_2014','AMT_APR_2014','AMT_MAY_2014']
TEMP_2014 = TMP_2014_cols + AMT_2014_cols + MaxT_2014_cols + MinT_2014_cols



soil_cols = ['nitrogen_0-5cm_mean','nitrogen_5-15cm_mean','nitrogen_15-30cm_mean','nitrogen_30-60cm_mean','nitrogen_60-100cm_mean','nitrogen_100-200cm_mean',
              'soc_0-5cm_mean','soc_5-15cm_mean','soc_15-30cm_mean','soc_30-60cm_mean','soc_60-100cm_mean','soc_100-200cm_mean',
              'bdod_0-5cm_mean','bdod_5-15cm_mean','bdod_15-30cm_mean','bdod_30-60cm_mean','bdod_60-100cm_mean','bdod_100-200cm_mean',
              'cec_0-5cm_mean','cec_5-15cm_mean','cec_15-30cm_mean','cec_30-60cm_mean','cec_60-100cm_mean','cec_100-200cm_mean',
              'cfvo_0-5cm_mean','cfvo_5-15cm_mean','cfvo_15-30cm_mean','cfvo_30-60cm_mean','cfvo_60-100cm_mean','cfvo_100-200cm_mean',
              'clay_0-5cm_mean','clay_5-15cm_mean','clay_15-30cm_mean','clay_30-60cm_mean','clay_60-100cm_mean','clay_100-200cm_mean',
              'ocd_0-5cm_mean','ocd_5-15cm_mean','ocd_15-30cm_mean','ocd_30-60cm_mean','ocd_60-100cm_mean','ocd_100-200cm_mean',
              'phh2o_0-5cm_mean','phh2o_5-15cm_mean','phh2o_15-30cm_mean','phh2o_30-60cm_mean','phh2o_60-100cm_mean','phh2o_100-200cm_mean',
              'sand_0-5cm_mean','sand_5-15cm_mean','sand_15-30cm_mean','sand_30-60cm_mean','sand_60-100cm_mean','sand_100-200cm_mean',
              'silt_0-5cm_mean','silt_5-15cm_mean','silt_15-30cm_mean','silt_30-60cm_mean','silt_60-100cm_mean','silt_100-200cm_mean']                 

In [175]:
# Model 1
columns_to_keep = predict_cols + location_cols + TEMP_2014 + soil_cols 
model_1_df = df[columns_to_keep]

model_1_df.to_csv('model_1.csv', index=False)

# Model 2
columns_to_keep=['Variety',
                 'Location',
                 'Prod_13pcUM',
                 'N uptake',
                 'Prot_PC_bio',
                 'più_freq',
                 'Peso hl',
                 'weight of 1000 seeds',
                 'Bianconati_PC',
                 'Stremenziti_PC',
                 'Spigatda1Marzo_gg',
                 'Altezzapianta_cm',
                 'Allettamentoa mat_09',
                 'Rugginebruna_09',
                 'Oidio_09',
                 'Septoria_09']

model_2_df = df[columns_to_keep]

model_2_df.to_csv('model_2.csv', index=False)


# Model 3
columns_to_keep = predict_cols + VI_2014 
model_3_df = df[columns_to_keep]

model_3_df.to_csv('model_3.csv', index=False)


# Model 4
columns_to_keep = predict_cols + location_cols + VI_2014 + TEMP_2014 + soil_cols
model_4_df = df[columns_to_keep]

model_4_df.to_csv('model_4.csv', index=False)