# Omdena  - Milan Chapter Agrifoods
## AI for Sustainable agri-food systems: use of Satellite Imagery
### Exploratory analysis of grapes and olives production in Italy 2006-2022
#### Author: Maria Fisher 


The main objective of this study is to have gather information about crop production in Italy for the period of 2006-2022. 

Crop dataset used in this study was downloaded from the Italian National Institute of Statistics (Istat).



In [1]:
import warnings 
warnings.filterwarnings("ignore")

import os
import pandas as pd
pd.options.display.float_format = "{:.2f}".format
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns 
import scipy 
import sklearn
import geopandas as gpd
import pgeocode
import folium
import sys
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

In [2]:
grapes_olives = pd.read_csv('./Italy_crop_data/grapes_olives.csv',skipinitialspace=True)
grapes_olives.head()

Unnamed: 0,ITTER107,Territory,TIPO_DATO5,Data type,AGRI_MADRE,Type of crop,TIME,Select time,Value,Flag Codes,Flags
0,ITE17,Pisa,HP_Q_EXT,harvested production - quintals,OLIVO,oil olives,2006,2006,125000.0,,
1,ITE17,Pisa,HP_Q_EXT,harvested production - quintals,OLIVO,oil olives,2007,2007,30702.0,,
2,ITE17,Pisa,HP_Q_EXT,harvested production - quintals,OLIVO,oil olives,2008,2008,145292.0,,
3,ITE17,Pisa,HP_Q_EXT,harvested production - quintals,OLIVO,oil olives,2009,2009,100000.0,,
4,ITE17,Pisa,HP_Q_EXT,harvested production - quintals,OLIVO,oil olives,2010,2010,95459.0,e,estimate data


## Pre-processing dataset 

In [3]:
# Drop Columns
grapes_olives = grapes_olives.drop(columns =['ITTER107','TIPO_DATO5','AGRI_MADRE', 'TIME', 'Flag Codes','Flags' ])
grapes_olives

Unnamed: 0,Territory,Data type,Type of crop,Select time,Value
0,Pisa,harvested production - quintals,oil olives,2006,125000.00
1,Pisa,harvested production - quintals,oil olives,2007,30702.00
2,Pisa,harvested production - quintals,oil olives,2008,145292.00
3,Pisa,harvested production - quintals,oil olives,2009,100000.00
4,Pisa,harvested production - quintals,oil olives,2010,95459.00
...,...,...,...,...,...
17616,Valle d'Aosta / Vallée d'Aoste,production area - hectares,other olives,2021,1.00
17617,Valle d'Aosta / Vallée d'Aoste,total production - quintals,other olives,2021,40.00
17618,Valle d'Aosta / Vallée d'Aoste,harvested production - quintals,other olives,2021,40.00
17619,Milano,production area - hectares,other olives,2021,2.00


In [4]:
# Rename Columns
grapes_olives = grapes_olives.rename(columns = {'Select time':'Year', 'Type of crop':'Type_crop', 'Data type':'Data_type', 'Territory':'City'})


In [5]:
grapes_olives = grapes_olives[grapes_olives["Year"] < 2022]

In [6]:
def show_info(grapes_olives):
    print('DATASET SHAPE: ', grapes_olives.shape, '\n')
    print('-'*50)
    print('FEATURE DATA TYPES:')
    print(grapes_olives.info())
    print('\n', '-'*50)
    print('NUMBER OF UNIQUE VALUES PER FEATURE:', '\n')
    print(grapes_olives.nunique())
    print('\n', '-'*50)
    print('NULL VALUES PER FEATURE')
    print(grapes_olives.isnull().sum())
show_info(grapes_olives)

DATASET SHAPE:  (17248, 5) 

--------------------------------------------------
FEATURE DATA TYPES:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 17248 entries, 0 to 17620
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   City       17248 non-null  object 
 1   Data_type  17248 non-null  object 
 2   Type_crop  17248 non-null  object 
 3   Year       17248 non-null  int64  
 4   Value      17248 non-null  float64
dtypes: float64(1), int64(1), object(3)
memory usage: 808.5+ KB
None

 --------------------------------------------------
NUMBER OF UNIQUE VALUES PER FEATURE: 

City          109
Data_type       6
Type_crop      21
Year           16
Value        6442
dtype: int64

 --------------------------------------------------
NULL VALUES PER FEATURE
City         0
Data_type    0
Type_crop    0
Year         0
Value        0
dtype: int64


## Cities in Italy producing grapes_olives

In [7]:
# Check cities names
grapes_olives.City.unique()


array(['Pisa', 'Ascoli Piceno', 'Ferrara', 'Livorno', 'Lucca', 'Palermo',
       'Isernia', 'Rieti', 'Foggia', 'Gorizia', 'Macerata', 'Pescara',
       'Cagliari', 'Novara', 'Pesaro e Urbino', 'Vibo Valentia', 'Rovigo',
       'Verona', 'Cuneo', 'Taranto', 'Bari', 'Firenze', 'Benevento',
       'Olbia-Tempio', 'Enna', 'Cosenza', 'Frosinone', 'Latina', 'Asti',
       'Genova', 'Caserta', 'Siracusa', 'Brindisi', 'Lecco', 'Bergamo',
       'Sassari', 'Terni', 'Belluno', 'Pordenone', 'Caltanissetta',
       'Medio Campidano', 'Parma', 'Ogliastra', 'Piacenza', 'Catania',
       'Carbonia-Iglesias', 'Catanzaro', 'Reggio di Calabria', 'Crotone',
       'Treviso', 'Ragusa', 'Oristano', 'Udine', 'Matera', 'Bologna',
       'Imperia', "L'Aquila", 'Biella', 'Salerno', 'Campobasso', 'Lecce',
       'La Spezia', 'Agrigento', 'Teramo', 'Savona', 'Napoli', 'Mantova',
       'Torino', 'Chieti', 'Alessandria', 'Varese', 'Sondrio',
       'Verbano-Cusio-Ossola', 'Pavia', 'Massa-Carrara',
       "Reggio 

In [8]:
grapes_olives1 =  grapes_olives.loc[ grapes_olives['Data_type'] == 'total production - quintals ']

In [9]:
grapes_olives1['total_production'] = grapes_olives1['Value'][grapes_olives1['Data_type']=='total production - quintals ']
grapes_olives1.head(10)   

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_production
60,Lucca,total production - quintals,wine,2006,54310.0,54310.0
61,Lucca,total production - quintals,wine,2007,46900.0,46900.0
62,Lucca,total production - quintals,wine,2008,45208.0,45208.0
63,Lucca,total production - quintals,wine,2009,44900.0,44900.0
64,Lucca,total production - quintals,wine,2010,34580.0,34580.0
65,Lucca,total production - quintals,wine,2011,28960.0,28960.0
66,Lucca,total production - quintals,wine,2012,27350.0,27350.0
67,Lucca,total production - quintals,wine,2013,33000.0,33000.0
68,Lucca,total production - quintals,wine,2014,35416.0,35416.0
69,Lucca,total production - quintals,wine,2015,22116.0,22116.0


In [10]:
grapes_olives2 =  grapes_olives.loc[ grapes_olives['Data_type'] == 'total area - hectares']
grapes_olives2 

Unnamed: 0,City,Data_type,Type_crop,Year,Value
15,Ascoli Piceno,total area - hectares,Grapes for table use,2006,33.00
16,Ascoli Piceno,total area - hectares,Grapes for table use,2007,33.00
17,Ascoli Piceno,total area - hectares,Grapes for table use,2008,33.00
18,Ascoli Piceno,total area - hectares,Grapes for table use,2009,30.00
19,Ascoli Piceno,total area - hectares,Grapes for table use,2010,15.00
...,...,...,...,...,...
17605,Sud Sardegna,total area - hectares,other olives,2020,12870.00
17606,Sud Sardegna,total area - hectares,other olives,2021,12870.00
17614,Grosseto,total area - hectares,table olives,2021,1.00
17615,Valle d'Aosta / Vallée d'Aoste,total area - hectares,other olives,2021,1.00


In [11]:
grapes_olives2['total_area_ha'] = grapes_olives2['Value'][grapes_olives2['Data_type']=='total area - hectares']
grapes_olives2.tail(10) 

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_area_ha
17582,Oristano,total area - hectares,other olives,2021,4534.0,4534.0
17589,Fermo,total area - hectares,other olives,2020,1653.0,1653.0
17590,Fermo,total area - hectares,other olives,2021,1653.0,1653.0
17597,Barletta-Andria-Trani,total area - hectares,other olives,2020,33300.0,33300.0
17598,Barletta-Andria-Trani,total area - hectares,other olives,2021,33300.0,33300.0
17605,Sud Sardegna,total area - hectares,other olives,2020,12870.0,12870.0
17606,Sud Sardegna,total area - hectares,other olives,2021,12870.0,12870.0
17614,Grosseto,total area - hectares,table olives,2021,1.0,1.0
17615,Valle d'Aosta / Vallée d'Aoste,total area - hectares,other olives,2021,1.0,1.0
17620,Ferrara,total area - hectares,table olives,2021,1.0,1.0


In [12]:
# Join both datasets 
grapes_olives_total =  pd.merge_ordered(grapes_olives1, grapes_olives2, on=['Year', 'City', 'Type_crop'], how='left')
grapes_olives_total

Unnamed: 0,City,Data_type_x,Type_crop,Year,Value_x,total_production,Data_type_y,Value_y,total_area_ha
0,Agrigento,total production - quintals,Grapes for table use,2006,1417500.00,1417500.00,total area - hectares,6770.00,6770.00
1,Agrigento,total production - quintals,olive oil,2006,92870.00,92870.00,,,
2,Agrigento,total production - quintals,wine,2006,1190000.00,1190000.00,,,
3,Alessandria,total production - quintals,Grapes for table use,2006,3825.00,3825.00,total area - hectares,37.00,37.00
4,Alessandria,total production - quintals,wine,2006,902605.00,902605.00,,,
...,...,...,...,...,...,...,...,...,...
5064,Viterbo,total production - quintals,grapes for wines with protected designation of...,2021,78800.00,78800.00,total area - hectares,870.00,870.00
5065,Viterbo,total production - quintals,grapes for wines with protected geographical i...,2021,115000.00,115000.00,total area - hectares,1300.00,1300.00
5066,Viterbo,total production - quintals,oil olives,2021,218000.00,218000.00,total area - hectares,14900.00,14900.00
5067,Viterbo,total production - quintals,olive oil,2021,23900.00,23900.00,,,


In [13]:
grapes_olives_total = grapes_olives_total.drop(columns =['Data_type_x','Data_type_y', 'Value_x', 'Value_y' ])
grapes_olives_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,Grapes for table use,2006,1417500.00,6770.00
1,Agrigento,olive oil,2006,92870.00,
2,Agrigento,wine,2006,1190000.00,
3,Alessandria,Grapes for table use,2006,3825.00,37.00
4,Alessandria,wine,2006,902605.00,
...,...,...,...,...,...
5064,Viterbo,grapes for wines with protected designation of...,2021,78800.00,870.00
5065,Viterbo,grapes for wines with protected geographical i...,2021,115000.00,1300.00
5066,Viterbo,oil olives,2021,218000.00,14900.00
5067,Viterbo,olive oil,2021,23900.00,


In [15]:
# Transform values from quintals to tonnes
grapes_olives_total['total_production'] = grapes_olives_total['total_production']/10
grapes_olives_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,Grapes for table use,2006,141750.00,6770.00
1,Agrigento,olive oil,2006,9287.00,
2,Agrigento,wine,2006,119000.00,
3,Alessandria,Grapes for table use,2006,382.50,37.00
4,Alessandria,wine,2006,90260.50,
...,...,...,...,...,...
5064,Viterbo,grapes for wines with protected designation of...,2021,7880.00,870.00
5065,Viterbo,grapes for wines with protected geographical i...,2021,11500.00,1300.00
5066,Viterbo,oil olives,2021,21800.00,14900.00
5067,Viterbo,olive oil,2021,2390.00,


In [16]:
# Rename column value to production tonnes
grapes_olives_total = grapes_olives_total.rename(columns = {'total_production':'production_tonnes'})
grapes_olives_total

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha
0,Agrigento,Grapes for table use,2006,141750.00,6770.00
1,Agrigento,olive oil,2006,9287.00,
2,Agrigento,wine,2006,119000.00,
3,Alessandria,Grapes for table use,2006,382.50,37.00
4,Alessandria,wine,2006,90260.50,
...,...,...,...,...,...
5064,Viterbo,grapes for wines with protected designation of...,2021,7880.00,870.00
5065,Viterbo,grapes for wines with protected geographical i...,2021,11500.00,1300.00
5066,Viterbo,oil olives,2021,21800.00,14900.00
5067,Viterbo,olive oil,2021,2390.00,


In [17]:
# Import climate dataset
climate_mean = pd.read_csv('climate2006-2021_mean.csv')
climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,0,Abano Terme,2006,6,100.76,13.3,7.32,71.53,1.56,0.66,25.65,3.84,0.65,0.67,56.54,21.8,1.68
1,1,Abano Terme,2007,19,100.66,14.06,7.23,68.75,1.59,0.61,26.09,4.63,0.61,0.63,54.11,21.47,1.94
2,2,Abano Terme,2008,32,100.63,13.64,7.72,73.95,1.67,0.69,24.69,4.3,0.67,0.7,59.45,20.38,2.77
3,3,Abano Terme,2009,45,100.47,13.71,7.72,72.75,1.69,0.7,24.92,4.11,0.7,0.72,57.77,20.82,2.45
4,4,Abano Terme,2010,58,100.33,12.55,7.54,75.6,1.7,0.75,23.83,2.73,0.73,0.76,61.55,21.1,3.15


In [18]:
# Check cities names
climate_mean.City.unique()

array(['Abano Terme', 'Abbadia Lariana', 'Abbadia San Salvatore', ...,
       'Zogno', 'Zola Predosa', 'Zoppola'], dtype=object)

In [19]:
# Select cities names that correlate with crop data 

climate_mean = climate_mean.apply(lambda row: row[climate_mean['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])

climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
603,603,Agrigento,2006,6,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
604,604,Agrigento,2007,19,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
605,605,Agrigento,2008,32,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
606,606,Agrigento,2009,45,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
607,607,Agrigento,2010,58,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


In [20]:
climate_mean.City.unique()

array(['Agrigento', 'Alessandria', 'Ancona',
       "Valle d'Aosta / Vallée d'Aoste", 'Arezzo', 'Ascoli Piceno',
       'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
       'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
       'Brescia', 'Brindisi', 'Sud Sardegna', 'Caltanissetta',
       'Campobasso', 'Carbonia-Iglesias', 'Caserta', 'Catania',
       'Catanzaro', 'Chieti', 'Latina', 'Como', 'Cosenza', 'Cremona',
       'Crotone', 'Cuneo', 'Enna', 'Fermo', 'Ferrara', 'Firenze',
       'Foggia', 'Frosinone', 'Genova', 'Gorizia', 'Grosseto', 'Imperia',
       'Isernia', 'La Spezia', 'Ogliastra', 'Lecce', 'Lecco', 'Livorno',
       'Lodi', 'Lucca', 'Macerata', 'Mantova', 'Matera', 'Messina',
       'Milano', 'Modena', 'Treviso', 'Monza e della Brianza', 'Napoli',
       'Novara', 'Nuoro', 'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo',
       'Parma', 'Pavia', 'Perugia', 'Pesaro e Urbino', 'Pescara',
       'Piacenza', 'Pisa', 'Pistoia', 'Pordenone', 'P

In [21]:
# Join both crop and climate datasets 
climate_grapes_olives = pd.merge_ordered(grapes_olives_total, climate_mean,  left_by="City").fillna(0)

In [22]:
climate_grapes_olives.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,olive oil,2006,9287.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,wine,2006,119000.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,Grapes for table use,2007,126521.3,6595.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
4,Agrigento,olive oil,2007,9510.0,0.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78


In [26]:
climate_grapes_olives =  climate_grapes_olives.to_csv('grapes_olives_climate_2006-2021.csv', index=False)

In [27]:
climate_grapes_olives = pd.read_csv('grapes_olives_climate_2006-2021.csv',skipinitialspace=True)
climate_grapes_olives.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,olive oil,2006,9287.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,wine,2006,119000.0,0.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,Grapes for table use,2007,126521.3,6595.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
4,Agrigento,olive oil,2007,9510.0,0.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78


Import fertilizers data 

In [28]:
fertilizer = pd.read_csv('Italy_crop_data/fertilizer2006-2021.csv',skipinitialspace=True)
fertilizer.head()

Unnamed: 0,City,Type_fertilizer,Year,Fertilizers_tonnes
0,Torino,calcium cyanamide,2006,122.7
1,Torino,calcium cyanamide,2007,181.2
2,Torino,calcium cyanamide,2008,522.4
3,Torino,calcium cyanamide,2009,205.1
4,Torino,calcium cyanamide,2010,5.4


Join crop and fertilizers datasets 

In [37]:
# Join both crop and fertilizers datasets 
crop_fertilizer_climate = pd.merge(climate_grapes_olives , fertilizer, on=['Year', 'City'], how='left').fillna(0)
crop_fertilizer_climate

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00
1,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70
2,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50
3,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50
4,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51952,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00
51953,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00
51954,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00
51955,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00


In [38]:
# Save crop and fertilizers joined data
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('grapes_olives_fertilizer_climate_2006-2021.csv', index=False)

In [40]:
crop_fertilizer_climate = pd.read_csv('grapes_olives_fertilizer_climate_2006-2021.csv',skipinitialspace=True)
crop_fertilizer_climate.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.0
1,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.7
2,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.5
3,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.5
4,Agrigento,Grapes for table use,2006,141750.0,6770.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.7


Import locations lat and lon 

In [41]:
geo = pd.read_csv('it_locations2.csv',skipinitialspace=True)
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
3,Turin,45.07,7.7
4,Palermo,38.12,13.36


In [42]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', ..., 'Tollegno', 'Revere',
       'Rive d’Arcano'], dtype=object)

In [43]:
# Select cities that correlate with crop data 

geo = geo.apply(lambda row: row[geo['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
4,Palermo,38.12,13.36
5,Genova,44.41,8.93


In [44]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', 'Palermo', 'Genova', 'Bologna',
       'Firenze', 'Bari', 'Catania', 'Venezia', 'Verona', 'Messina',
       'Padova', 'Trieste', 'Taranto', 'Brescia', 'Parma', 'Prato',
       'Modena', 'Reggio di Calabria', "Reggio nell'Emilia", 'Perugia',
       'Ravenna', 'Livorno', 'Cagliari', 'Foggia', 'Rimini', 'Salerno',
       'Ferrara', 'Latina', 'Monza e della Brianza', 'Siracusa',
       'Bergamo', 'Pescara', 'Trento', 'Sassari', 'Vicenza', 'Terni',
       'Bolzano / Bozen', 'Novara', 'Piacenza', 'Ancona', 'Udine',
       'Arezzo', 'Forlì-Cesena', 'Lecce', 'Pesaro e Urbino',
       'Barletta-Andria-Trani', 'Alessandria', 'La Spezia', 'Pistoia',
       'Pisa', 'Catanzaro', 'Lucca', 'Brindisi', 'Treviso', 'Como',
       'Grosseto', 'Varese', 'Asti', 'Caserta', 'Ragusa', 'Pavia',
       'Cremona', 'Trapani', 'Viterbo', 'Cosenza', 'Potenza', 'Crotone',
       'Massa-Carrara', 'Caltanissetta', 'Benevento', 'Savona', 'Matera',
       'Olbia-Tempio', 'Agrigento', 

In [45]:
# Join both crop and locations datasets 
crop_fertilizer_climate = pd.merge_ordered(crop_fertilizer_climate, geo,  left_by="City").fillna(0)

In [46]:
crop_fertilizer_climate

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes,lat,lon
0,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00,37.31,13.58
1,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70,37.31,13.58
2,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50,37.31,13.58
3,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50,37.31,13.58
4,Agrigento,Grapes for table use,2006,141750.00,6770.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70,37.31,13.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51952,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00,0.00,0.00
51953,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00,0.00,0.00
51954,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00,0.00,0.00
51955,Sud Sardegna,table olives,2021,4573.30,2534.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00,0.00,0.00


In [47]:
# Save final dataset
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('grapes-olives_final_2006-2021.csv', index=False)