# Omdena  - Milan Chapter Agrifoods
## AI for Sustainable agri-food systems: use of Satellite Imagery
### Exploratory analysis of fresh and vegetables production in Italy 2006-2021

#### Author: Maria Fisher 


The main objective of this study is to have gather information about crop production in Italy for the period of 2006-2022. 

Crop dataset used in this study was downloaded from the Italian National Institute of Statistics (Istat).



In [2]:
import warnings 
warnings.filterwarnings("ignore")

import os
import pandas as pd
pd.options.display.float_format = "{:.2f}".format
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns 
import scipy 
import sklearn
import geopandas as gpd
import pgeocode
import folium
import sys
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

In [7]:
fresh_veg = pd.read_csv('./Italy_crop_data/fresh_veg.csv',skipinitialspace=True)
fresh_veg.head()

Unnamed: 0,ITTER107,Territory,TIPO_DATO5,Data type,AGRI_MADRE,Type of crop,TIME,Select time,Value,Flag Codes,Flags
0,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,RADIINGRE,chicory and radicchio in greenhouses,2006,2006,300,e,estimate data
1,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,RADIINGRE,chicory and radicchio in greenhouses,2007,2007,450,,
2,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,RADIINGRE,chicory and radicchio in greenhouses,2008,2008,450,,
3,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,RADIINGRE,chicory and radicchio in greenhouses,2009,2009,375,e,estimate data
4,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,RADIINGRE,chicory and radicchio in greenhouses,2010,2010,425,e,estimate data


## Pre-processing dataset 

In [8]:
# Drop Columns
fresh_veg = fresh_veg.drop(columns =['ITTER107','TIPO_DATO5','AGRI_MADRE', 'TIME', 'Flag Codes','Flags' ])
fresh_veg

Unnamed: 0,Territory,Data type,Type of crop,Select time,Value
0,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2006,300
1,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2007,450
2,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2008,450
3,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2009,375
4,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2010,425
...,...,...,...,...,...
158967,Rimini,harvested production - quintals,other fresh legumes,2022,1360
158968,Perugia,total area - hectares,other fresh legumes,2022,5
158969,Perugia,total production - quintals,other fresh legumes,2022,400
158970,Perugia,harvested production - quintals,other fresh legumes,2022,400


In [9]:
# Rename Columns
fresh_veg = fresh_veg.rename(columns = {'Select time':'Year', 'Type of crop':'Type_crop', 'Data type':'Data_type', 'Territory':'City'})


In [10]:
fresh_veg = fresh_veg[fresh_veg["Year"] < 2022]

In [11]:
def show_info(fresh_veg):
    print('DATASET SHAPE: ', fresh_veg.shape, '\n')
    print('-'*50)
    print('FEATURE DATA TYPES:')
    print(fresh_veg.info())
    print('\n', '-'*50)
    print('NUMBER OF UNIQUE VALUES PER FEATURE:', '\n')
    print(fresh_veg.nunique())
    print('\n', '-'*50)
    print('NULL VALUES PER FEATURE')
    print(fresh_veg.isnull().sum())
show_info(fresh_veg)

DATASET SHAPE:  (153632, 5) 

--------------------------------------------------
FEATURE DATA TYPES:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 153632 entries, 0 to 158955
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   City       153632 non-null  object
 1   Data_type  153632 non-null  object
 2   Type_crop  153632 non-null  object
 3   Year       153632 non-null  int64 
 4   Value      153632 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 7.0+ MB
None

 --------------------------------------------------
NUMBER OF UNIQUE VALUES PER FEATURE: 

City           110
Data_type        5
Type_crop       68
Year            16
Value        15689
dtype: int64

 --------------------------------------------------
NULL VALUES PER FEATURE
City         0
Data_type    0
Type_crop    0
Year         0
Value        0
dtype: int64


## Cities in Italy producing fresh_veg

In [12]:
# Check cities names
fresh_veg.City.unique()


array(['Cuneo', 'Asti', 'Imperia', 'Genova', 'La Spezia', 'Varese',
       'Lecco', 'Milano', 'Bergamo', 'Brescia', 'Cremona', 'Mantova',
       'Trentino Alto Adige / Südtirol', 'Vicenza', 'Treviso', 'Venezia',
       'Padova', 'Rovigo', 'Trieste', 'Piacenza', 'Parma', 'Bologna',
       'Ferrara', 'Forlì-Cesena', 'Massa-Carrara', 'Firenze', 'Prato',
       'Livorno', 'Arezzo', 'Perugia', 'Pesaro e Urbino', 'Ancona',
       'Ascoli Piceno', 'Roma', 'Latina', 'Frosinone', 'Caserta',
       'Napoli', 'Salerno', 'Brindisi', 'Catanzaro', 'Reggio di Calabria',
       'Agrigento', 'Catania', 'Sassari', 'Nuoro', 'Cagliari', 'Oristano',
       'Olbia-Tempio', 'Ogliastra', 'Medio Campidano',
       'Carbonia-Iglesias', 'Torino', 'Novara', 'Alessandria', 'Verona',
       'Pordenone', 'Modena', 'Ravenna', 'Rimini', 'Lucca', 'Pistoia',
       'Pisa', 'Siena', 'Grosseto', 'Macerata', "L'Aquila", 'Chieti',
       'Foggia', 'Bari', 'Taranto', 'Lecce', 'Potenza', 'Matera',
       'Cosenza', 'Trapani',

In [13]:
fresh_veg1 =  fresh_veg.loc[ fresh_veg['Data_type'] == 'total production - quintals ']

In [14]:
fresh_veg1['total_production'] = fresh_veg1['Value'][fresh_veg1['Data_type']=='total production - quintals ']
fresh_veg1.head(10)   

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_production
0,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2006,300,300
1,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2007,450,450
2,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2008,450,450
3,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2009,375,375
4,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2010,425,425
5,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2011,425,425
6,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2012,408,408
7,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2013,415,415
8,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2014,425,425
9,Cuneo,total production - quintals,chicory and radicchio in greenhouses,2015,825,825


In [15]:
fresh_veg2 =  fresh_veg.loc[ fresh_veg['Data_type'] == 'total area - hectares']
fresh_veg2 

Unnamed: 0,City,Data_type,Type_crop,Year,Value
1764,Torino,total area - hectares,celery in open field,2006,10
1765,Torino,total area - hectares,celery in open field,2008,10
1766,Torino,total area - hectares,celery in open field,2009,10
1767,Torino,total area - hectares,celery in open field,2010,10
1768,Torino,total area - hectares,celery in open field,2011,10
...,...,...,...,...,...
158706,Treviso,total area - hectares,shallots in open field,2021,1
158708,Piacenza,total area - hectares,shallots in open field,2021,59
158710,Alessandria,total area - hectares,white cabbage,2021,15
158711,Bologna,total area - hectares,white cabbage,2021,10


In [16]:
fresh_veg2['total_area_ha'] = fresh_veg2['Value'][fresh_veg2['Data_type']=='total area - hectares']
fresh_veg2.tail(10) 

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_area_ha
158697,Pordenone,total area - hectares,tomato for fresh consumption,2021,4,4
158699,Bologna,total area - hectares,tomato for fresh consumption,2021,20,20
158701,Alessandria,total area - hectares,shallots in open field,2021,32,32
158702,Verona,total area - hectares,shallots in open field,2021,17,17
158704,Vicenza,total area - hectares,shallots in open field,2021,6,6
158706,Treviso,total area - hectares,shallots in open field,2021,1,1
158708,Piacenza,total area - hectares,shallots in open field,2021,59,59
158710,Alessandria,total area - hectares,white cabbage,2021,15,15
158711,Bologna,total area - hectares,white cabbage,2021,10,10
158712,Alessandria,total area - hectares,red cabbage,2021,6,6


In [17]:
# Join both datasets 
fresh_veg_total =  pd.merge_ordered(fresh_veg1, fresh_veg2, on=['Year', 'City', 'Type_crop'], how='left')
fresh_veg_total

Unnamed: 0,City,Data_type_x,Type_crop,Year,Value_x,total_production,Data_type_y,Value_y,total_area_ha
0,Agrigento,total production - quintals,artichokes,2006,340200,340200,total area - hectares,3240.00,3240.00
1,Agrigento,total production - quintals,bean in greenhouses,2006,3680,3680,,,
2,Agrigento,total production - quintals,cabbage in open field,2006,9460,9460,total area - hectares,55.00,55.00
3,Agrigento,total production - quintals,cauliflower and broccoli in open field,2006,67080,67080,total area - hectares,312.00,312.00
4,Agrigento,total production - quintals,celery in open field,2006,1250,1250,total area - hectares,14.00,14.00
...,...,...,...,...,...,...,...,...,...
51109,Viterbo,total production - quintals,tomato in grennhouses,2021,1000,1000,,,
51110,Viterbo,total production - quintals,tomatoes for processing,2021,1185000,1185000,total area - hectares,1600.00,1600.00
51111,Viterbo,total production - quintals,turnip broccoli,2021,3530,3530,total area - hectares,15.00,15.00
51112,Viterbo,total production - quintals,watermelon in greenhouses,2021,450,450,,,


In [18]:
fresh_veg_total = fresh_veg_total.drop(columns =['Data_type_x','Data_type_y', 'Value_x', 'Value_y' ])
fresh_veg_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,artichokes,2006,340200,3240.00
1,Agrigento,bean in greenhouses,2006,3680,
2,Agrigento,cabbage in open field,2006,9460,55.00
3,Agrigento,cauliflower and broccoli in open field,2006,67080,312.00
4,Agrigento,celery in open field,2006,1250,14.00
...,...,...,...,...,...
51109,Viterbo,tomato in grennhouses,2021,1000,
51110,Viterbo,tomatoes for processing,2021,1185000,1600.00
51111,Viterbo,turnip broccoli,2021,3530,15.00
51112,Viterbo,watermelon in greenhouses,2021,450,


In [19]:
# Transform values from quintals to tonnes
fresh_veg_total['total_production'] = fresh_veg_total['total_production']/10
fresh_veg_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,artichokes,2006,34020.00,3240.00
1,Agrigento,bean in greenhouses,2006,368.00,
2,Agrigento,cabbage in open field,2006,946.00,55.00
3,Agrigento,cauliflower and broccoli in open field,2006,6708.00,312.00
4,Agrigento,celery in open field,2006,125.00,14.00
...,...,...,...,...,...
51109,Viterbo,tomato in grennhouses,2021,100.00,
51110,Viterbo,tomatoes for processing,2021,118500.00,1600.00
51111,Viterbo,turnip broccoli,2021,353.00,15.00
51112,Viterbo,watermelon in greenhouses,2021,45.00,


In [20]:
# Rename column value to production tonnes
fresh_veg_total = fresh_veg_total.rename(columns = {'total_production':'production_tonnes'})
fresh_veg_total

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha
0,Agrigento,artichokes,2006,34020.00,3240.00
1,Agrigento,bean in greenhouses,2006,368.00,
2,Agrigento,cabbage in open field,2006,946.00,55.00
3,Agrigento,cauliflower and broccoli in open field,2006,6708.00,312.00
4,Agrigento,celery in open field,2006,125.00,14.00
...,...,...,...,...,...
51109,Viterbo,tomato in grennhouses,2021,100.00,
51110,Viterbo,tomatoes for processing,2021,118500.00,1600.00
51111,Viterbo,turnip broccoli,2021,353.00,15.00
51112,Viterbo,watermelon in greenhouses,2021,45.00,


In [21]:
# Import climate dataset
climate_mean = pd.read_csv('climate2006-2021_mean.csv')
climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,0,Abano Terme,2006,6,100.76,13.3,7.32,71.53,1.56,0.66,25.65,3.84,0.65,0.67,56.54,21.8,1.68
1,1,Abano Terme,2007,19,100.66,14.06,7.23,68.75,1.59,0.61,26.09,4.63,0.61,0.63,54.11,21.47,1.94
2,2,Abano Terme,2008,32,100.63,13.64,7.72,73.95,1.67,0.69,24.69,4.3,0.67,0.7,59.45,20.38,2.77
3,3,Abano Terme,2009,45,100.47,13.71,7.72,72.75,1.69,0.7,24.92,4.11,0.7,0.72,57.77,20.82,2.45
4,4,Abano Terme,2010,58,100.33,12.55,7.54,75.6,1.7,0.75,23.83,2.73,0.73,0.76,61.55,21.1,3.15


In [22]:
# Check cities names
climate_mean.City.unique()

array(['Abano Terme', 'Abbadia Lariana', 'Abbadia San Salvatore', ...,
       'Zogno', 'Zola Predosa', 'Zoppola'], dtype=object)

In [23]:
# Select cities that correlate with crop data 

climate_mean = climate_mean.apply(lambda row: row[climate_mean['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])

climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
603,603,Agrigento,2006,6,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
604,604,Agrigento,2007,19,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
605,605,Agrigento,2008,32,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
606,606,Agrigento,2009,45,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
607,607,Agrigento,2010,58,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


In [24]:
climate_mean.City.unique()

array(['Agrigento', 'Alessandria', 'Ancona',
       "Valle d'Aosta / Vallée d'Aoste", 'Arezzo', 'Ascoli Piceno',
       'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
       'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
       'Brescia', 'Brindisi', 'Sud Sardegna', 'Caltanissetta',
       'Campobasso', 'Carbonia-Iglesias', 'Caserta', 'Catania',
       'Catanzaro', 'Chieti', 'Latina', 'Como', 'Cosenza', 'Cremona',
       'Crotone', 'Cuneo', 'Enna', 'Fermo', 'Ferrara', 'Firenze',
       'Foggia', 'Frosinone', 'Genova', 'Gorizia', 'Grosseto', 'Imperia',
       'Isernia', 'La Spezia', 'Ogliastra', 'Lecce', 'Lecco', 'Livorno',
       'Lodi', 'Lucca', 'Macerata', 'Mantova', 'Matera', 'Messina',
       'Milano', 'Modena', 'Treviso', 'Monza e della Brianza', 'Napoli',
       'Novara', 'Nuoro', 'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo',
       'Parma', 'Pavia', 'Perugia', 'Pesaro e Urbino', 'Pescara',
       'Piacenza', 'Pisa', 'Pistoia', 'Pordenone', 'P

In [25]:
# Join both crop and climate datasets 
climate_fresh_veg = pd.merge_ordered(fresh_veg_total, climate_mean,  left_by="City").fillna(0)

In [26]:
climate_fresh_veg.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,artichokes,2006,34020.0,3240.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,bean in greenhouses,2006,368.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,cabbage in open field,2006,946.0,55.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,cauliflower and broccoli in open field,2006,6708.0,312.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
4,Agrigento,celery in open field,2006,125.0,14.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58


In [27]:
climate_fresh_veg =  climate_fresh_veg.to_csv('fresh_veg_climate_2006-2021.csv', index=False)

In [28]:
climate_fresh_veg = pd.read_csv('fresh_veg_climate_2006-2021.csv',skipinitialspace=True)
climate_fresh_veg.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,artichokes,2006,34020.0,3240.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,bean in greenhouses,2006,368.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,cabbage in open field,2006,946.0,55.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,cauliflower and broccoli in open field,2006,6708.0,312.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
4,Agrigento,celery in open field,2006,125.0,14.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58


Import fertilizers data 

In [35]:
fertilizer = pd.read_csv('Italy_crop_data/fertilizer2006-2021.csv',skipinitialspace=True)
fertilizer.head()

Unnamed: 0,City,Type_fertilizer,Year,Fertilizers_tonnes
0,Torino,calcium cyanamide,2006,122.7
1,Torino,calcium cyanamide,2007,181.2
2,Torino,calcium cyanamide,2008,522.4
3,Torino,calcium cyanamide,2009,205.1
4,Torino,calcium cyanamide,2010,5.4


Join crop and fertilizers datasets 

In [37]:
# Join both datasets 
crop_fertilizer_climate = pd.merge(climate_fresh_veg , fertilizer, on=['Year', 'City'], how='left').fillna(0)
crop_fertilizer_climate

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,artichokes,2006,34020.00,3240.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00
1,Agrigento,artichokes,2006,34020.00,3240.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70
2,Agrigento,artichokes,2006,34020.00,3240.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50
3,Agrigento,artichokes,2006,34020.00,3240.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50
4,Agrigento,artichokes,2006,34020.00,3240.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525112,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00
525113,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00
525114,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00
525115,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00


In [38]:
crop_fertilizer_climate = crop_fertilizer_climate.drop(columns =['Unnamed: 0.1','Unnamed: 0'])
crop_fertilizer_climate

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00
1,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70
2,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50
3,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50
4,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525112,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00
525113,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00
525114,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00
525115,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00


In [39]:
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('fresh_veg_fertilizer_climate_2006-2021.csv', index=False)

In [3]:
crop_fertilizer_climate = pd.read_csv('fresh_veg_fertilizer_climate_2006-2021.csv',skipinitialspace=True)
crop_fertilizer_climate.head()

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,artichokes,2006,34020.0,3240.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.0
1,Agrigento,artichokes,2006,34020.0,3240.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.7
2,Agrigento,artichokes,2006,34020.0,3240.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.5
3,Agrigento,artichokes,2006,34020.0,3240.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.5
4,Agrigento,artichokes,2006,34020.0,3240.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.7


Import locations lat and lon 

In [4]:
geo = pd.read_csv('it_locations2.csv',skipinitialspace=True)
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
3,Turin,45.07,7.7
4,Palermo,38.12,13.36


In [5]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', ..., 'Tollegno', 'Revere',
       'Rive d’Arcano'], dtype=object)

In [6]:
# Select cities that correlate with crop data 

geo = geo.apply(lambda row: row[geo['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
4,Palermo,38.12,13.36
5,Genova,44.41,8.93


In [7]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', 'Palermo', 'Genova', 'Bologna',
       'Firenze', 'Bari', 'Catania', 'Venezia', 'Verona', 'Messina',
       'Padova', 'Trieste', 'Taranto', 'Brescia', 'Parma', 'Prato',
       'Modena', 'Reggio di Calabria', "Reggio nell'Emilia", 'Perugia',
       'Ravenna', 'Livorno', 'Cagliari', 'Foggia', 'Rimini', 'Salerno',
       'Ferrara', 'Latina', 'Monza e della Brianza', 'Siracusa',
       'Bergamo', 'Pescara', 'Trento', 'Sassari', 'Vicenza', 'Terni',
       'Bolzano / Bozen', 'Novara', 'Piacenza', 'Ancona', 'Udine',
       'Arezzo', 'Forlì-Cesena', 'Lecce', 'Pesaro e Urbino',
       'Barletta-Andria-Trani', 'Alessandria', 'La Spezia', 'Pistoia',
       'Pisa', 'Catanzaro', 'Lucca', 'Brindisi', 'Treviso', 'Como',
       'Grosseto', 'Varese', 'Asti', 'Caserta', 'Ragusa', 'Pavia',
       'Cremona', 'Trapani', 'Viterbo', 'Cosenza', 'Potenza', 'Crotone',
       'Massa-Carrara', 'Caltanissetta', 'Benevento', 'Savona', 'Matera',
       'Olbia-Tempio', 'Agrigento', 

In [8]:
# Join both crop and locations  datasets 
crop_fertilizer_climate = pd.merge_ordered(crop_fertilizer_climate, geo,  left_by="City").fillna(0)

In [9]:
crop_fertilizer_climate

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha,PS,TS,QV2M,RH2M,WS2M,...,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes,lat,lon
0,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,...,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00,37.31,13.58
1,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,...,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70,37.31,13.58
2,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,...,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50,37.31,13.58
3,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,...,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50,37.31,13.58
4,Agrigento,artichokes,2006,34020.00,3240.00,96.87,16.70,8.33,71.95,2.61,...,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70,37.31,13.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525112,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,...,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00,0.00,0.00
525113,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,...,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00,0.00,0.00
525114,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,...,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00,0.00,0.00
525115,Sud Sardegna,watermelon in open field,2021,10386.80,161.00,100.92,19.17,10.55,76.47,4.35,...,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00,0.00,0.00


In [10]:
# Save final dataset
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('fresh-veg_final_2006-2021.csv', index=False)