# Omdena  - Milan Chapter Agrifoods
## AI for Sustainable agri-food systems: use of Satellite Imagery
### Exploratory analysis of fruits_crop production in Italy 2006-2022
#### Author: Maria Fisher 


The main objective of this study is to have gather information about crop production in Italy for the period of 2006-2022. 

Crop dataset used in this study was downloaded from the Italian National Institute of Statistics (Istat).



In [1]:
import warnings 
warnings.filterwarnings("ignore")

import os
import pandas as pd
pd.options.display.float_format = "{:.2f}".format
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns 
import scipy 
import sklearn
import geopandas as gpd
import pgeocode
import folium
import sys
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

In [2]:
fruits = pd.read_csv('./Italy_crop_data/fruits_crop.csv',skipinitialspace=True)
fruits.head()

Unnamed: 0,ITTER107,Territory,TIPO_DATO5,Data type,AGRI_MADRE,Type of crop,TIME,Select time,Value,Flag Codes,Flags
0,ITD35,Venezia,PA_EXT,production area - hectares,KIWI,kiwi,2006,2006,22,,
1,ITD35,Venezia,PA_EXT,production area - hectares,KIWI,kiwi,2007,2007,10,,
2,ITD35,Venezia,PA_EXT,production area - hectares,KIWI,kiwi,2008,2008,10,,
3,ITD35,Venezia,PA_EXT,production area - hectares,KIWI,kiwi,2009,2009,10,,
4,ITD35,Venezia,PA_EXT,production area - hectares,KIWI,kiwi,2010,2010,9,,


## Pre-processing dataset 

In [3]:
# Drop Columns
fruits = fruits.drop(columns =['ITTER107','TIPO_DATO5','AGRI_MADRE', 'TIME', 'Flag Codes','Flags' ])
fruits

Unnamed: 0,Territory,Data type,Type of crop,Select time,Value
0,Venezia,production area - hectares,kiwi,2006,22
1,Venezia,production area - hectares,kiwi,2007,10
2,Venezia,production area - hectares,kiwi,2008,10
3,Venezia,production area - hectares,kiwi,2009,10
4,Venezia,production area - hectares,kiwi,2010,9
...,...,...,...,...,...
86383,Treviso,total area - hectares,sour cherries,2022,15
86384,Padova,total area - hectares,sour cherries,2022,12
86385,Rovigo,total area - hectares,sour cherries,2022,1
86386,Pesaro e Urbino,total area - hectares,sour cherries,2022,3


In [4]:
# Rename Columns
fruits = fruits.rename(columns = {'Select time':'Year', 'Type of crop':'Type_crop', 'Data type':'Data_type', 'Territory':'City'})


In [5]:
fruits = fruits[fruits["Year"] < 2022]

In [6]:
def show_info(fruits):
    print('DATASET SHAPE: ', fruits.shape, '\n')
    print('-'*50)
    print('FEATURE DATA TYPES:')
    print(fruits.info())
    print('\n', '-'*50)
    print('NUMBER OF UNIQUE VALUES PER FEATURE:', '\n')
    print(fruits.nunique())
    print('\n', '-'*50)
    print('NULL VALUES PER FEATURE')
    print(fruits.isnull().sum())
show_info(fruits)

DATASET SHAPE:  (79926, 5) 

--------------------------------------------------
FEATURE DATA TYPES:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 79926 entries, 0 to 86368
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   City       79926 non-null  object
 1   Data_type  79926 non-null  object
 2   Type_crop  79926 non-null  object
 3   Year       79926 non-null  int64 
 4   Value      79926 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 3.7+ MB
None

 --------------------------------------------------
NUMBER OF UNIQUE VALUES PER FEATURE: 

City          112
Data_type       4
Type_crop      55
Year           16
Value        9846
dtype: int64

 --------------------------------------------------
NULL VALUES PER FEATURE
City         0
Data_type    0
Type_crop    0
Year         0
Value        0
dtype: int64


## Cities in Italy producing fruits

In [7]:
# Check cities names
fruits.City.unique()


array(['Venezia', 'Frosinone', 'Latina', 'Trentino Alto Adige / Südtirol',
       'Ragusa', 'Siracusa', 'Udine', 'Nuoro', 'Padova', 'Perugia',
       'Lucca', 'Bologna', 'Avellino', 'Modena', 'Ascoli Piceno', 'Lecce',
       'Livorno', 'Grosseto', 'Siena', 'Oristano', 'Vercelli',
       'Campobasso', 'Pesaro e Urbino', 'Catania', 'Vibo Valentia',
       'Firenze', 'Massa-Carrara', 'Olbia-Tempio', 'Foggia', 'Trapani',
       'Treviso', 'Bari', 'Cagliari', 'Brindisi', 'Gorizia', 'Milano',
       'Mantova', 'Alessandria', "Reggio nell'Emilia", 'Pistoia', 'Rieti',
       'Sondrio', 'Napoli', 'Forlì-Cesena', 'Agrigento', 'Novara',
       'Piacenza', 'Pescara', 'Roma', 'Arezzo', 'Ferrara', 'Catanzaro',
       'Sassari', 'Rovigo', 'Pordenone', 'Biella', 'Viterbo', 'Cremona',
       'Lodi', 'Cosenza', 'Salerno', 'Genova', 'La Spezia', 'Savona',
       'Torino', 'Carbonia-Iglesias', 'Caltanissetta', 'Taranto', 'Pavia',
       'Asti', 'Trieste', 'Imperia', 'Chieti', 'Ogliastra', 'Teramo',
      

In [8]:
fruits1 =  fruits.loc[ fruits['Data_type'] == 'total production - quintals ']

In [9]:
fruits1['total_production'] = fruits1['Value'][fruits1['Data_type']=='total production - quintals ']
fruits1.head(10)   

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_production
103,Perugia,total production - quintals,nectarine,2006,2925,2925
104,Perugia,total production - quintals,nectarine,2007,2850,2850
105,Perugia,total production - quintals,nectarine,2008,2800,2800
106,Perugia,total production - quintals,nectarine,2009,2800,2800
107,Perugia,total production - quintals,nectarine,2010,2800,2800
108,Perugia,total production - quintals,nectarine,2011,3000,3000
109,Perugia,total production - quintals,nectarine,2012,2700,2700
110,Perugia,total production - quintals,nectarine,2013,3100,3100
111,Perugia,total production - quintals,nectarine,2014,3355,3355
112,Perugia,total production - quintals,nectarine,2015,3410,3410


In [10]:
fruits2 =  fruits.loc[ fruits['Data_type'] == 'total area - hectares']
fruits2 

Unnamed: 0,City,Data_type,Type_crop,Year,Value
17,Frosinone,total area - hectares,pomegranates,2014,2
18,Frosinone,total area - hectares,pomegranates,2015,5
19,Frosinone,total area - hectares,pomegranates,2016,5
20,Frosinone,total area - hectares,pomegranates,2017,10
21,Frosinone,total area - hectares,pomegranates,2018,10
...,...,...,...,...,...
86358,Barletta-Andria-Trani,total area - hectares,citrus fruit,2021,19
86362,Sud Sardegna,total area - hectares,citrus fruit,2021,2251
86366,Biella,total area - hectares,currants,2021,1
86367,Grosseto,total area - hectares,blueberries,2021,1


In [11]:
fruits2['total_area_ha'] = fruits2['Value'][fruits2['Data_type']=='total area - hectares']
fruits2.tail(10) 

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_area_ha
86338,Siracusa,total area - hectares,citrus fruit,2021,23630,23630
86342,Sassari,total area - hectares,citrus fruit,2021,51,51
86346,Nuoro,total area - hectares,citrus fruit,2021,300,300
86350,Cagliari,total area - hectares,citrus fruit,2021,33,33
86354,Oristano,total area - hectares,citrus fruit,2021,606,606
86358,Barletta-Andria-Trani,total area - hectares,citrus fruit,2021,19,19
86362,Sud Sardegna,total area - hectares,citrus fruit,2021,2251,2251
86366,Biella,total area - hectares,currants,2021,1,1
86367,Grosseto,total area - hectares,blueberries,2021,1,1
86368,Biella,total area - hectares,other berries excluding rowan,2021,2,2


In [12]:
# Join both datasets 
fruits_total =  pd.merge_ordered(fruits1, fruits2, on=['Year', 'City', 'Type_crop'], how='left')
fruits_total

Unnamed: 0,City,Data_type_x,Type_crop,Year,Value_x,total_production,Data_type_y,Value_y,total_area_ha
0,Agrigento,total production - quintals,almond,2006,267630,267630,total area - hectares,14090.00,14090.00
1,Agrigento,total production - quintals,apple,2006,12920,12920,total area - hectares,70.00,70.00
2,Agrigento,total production - quintals,apricot,2006,56100,56100,total area - hectares,340.00,340.00
3,Agrigento,total production - quintals,cherry in complex,2006,3358,3358,total area - hectares,46.00,46.00
4,Agrigento,total production - quintals,clementine,2006,27200,27200,total area - hectares,160.00,160.00
...,...,...,...,...,...,...,...,...,...
19968,Viterbo,total production - quintals,plum,2021,1410,1410,total area - hectares,18.00,18.00
19969,Viterbo,total production - quintals,pomegranates,2021,7500,7500,total area - hectares,60.00,60.00
19970,Viterbo,total production - quintals,raspberry,2021,215,215,total area - hectares,10.00,10.00
19971,Viterbo,total production - quintals,red currant,2021,116,116,total area - hectares,5.00,5.00


In [13]:
fruits_total = fruits_total.drop(columns =['Data_type_x','Data_type_y', 'Value_x', 'Value_y' ])
fruits_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,almond,2006,267630,14090.00
1,Agrigento,apple,2006,12920,70.00
2,Agrigento,apricot,2006,56100,340.00
3,Agrigento,cherry in complex,2006,3358,46.00
4,Agrigento,clementine,2006,27200,160.00
...,...,...,...,...,...
19968,Viterbo,plum,2021,1410,18.00
19969,Viterbo,pomegranates,2021,7500,60.00
19970,Viterbo,raspberry,2021,215,10.00
19971,Viterbo,red currant,2021,116,5.00


In [17]:
# Transform values from quintals to tonnes
fruits_total['total_production'] = fruits_total['total_production']/10
fruits_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,almond,2006,26763.00,14090.00
1,Agrigento,apple,2006,1292.00,70.00
2,Agrigento,apricot,2006,5610.00,340.00
3,Agrigento,cherry in complex,2006,335.80,46.00
4,Agrigento,clementine,2006,2720.00,160.00
...,...,...,...,...,...
19968,Viterbo,plum,2021,141.00,18.00
19969,Viterbo,pomegranates,2021,750.00,60.00
19970,Viterbo,raspberry,2021,21.50,10.00
19971,Viterbo,red currant,2021,11.60,5.00


In [18]:
# Rename column value to production tonnes
fruits_total = fruits_total.rename(columns = {'total_production':'production_tonnes'})
fruits_total

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha
0,Agrigento,almond,2006,26763.00,14090.00
1,Agrigento,apple,2006,1292.00,70.00
2,Agrigento,apricot,2006,5610.00,340.00
3,Agrigento,cherry in complex,2006,335.80,46.00
4,Agrigento,clementine,2006,2720.00,160.00
...,...,...,...,...,...
19968,Viterbo,plum,2021,141.00,18.00
19969,Viterbo,pomegranates,2021,750.00,60.00
19970,Viterbo,raspberry,2021,21.50,10.00
19971,Viterbo,red currant,2021,11.60,5.00


In [21]:
# Import climate dataset
climate_mean = pd.read_csv('climate2006-2021_mean.csv')
climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,0,Abano Terme,2006,6,100.76,13.3,7.32,71.53,1.56,0.66,25.65,3.84,0.65,0.67,56.54,21.8,1.68
1,1,Abano Terme,2007,19,100.66,14.06,7.23,68.75,1.59,0.61,26.09,4.63,0.61,0.63,54.11,21.47,1.94
2,2,Abano Terme,2008,32,100.63,13.64,7.72,73.95,1.67,0.69,24.69,4.3,0.67,0.7,59.45,20.38,2.77
3,3,Abano Terme,2009,45,100.47,13.71,7.72,72.75,1.69,0.7,24.92,4.11,0.7,0.72,57.77,20.82,2.45
4,4,Abano Terme,2010,58,100.33,12.55,7.54,75.6,1.7,0.75,23.83,2.73,0.73,0.76,61.55,21.1,3.15


In [22]:
# Check cities names
climate_mean.City.unique()

array(['Abano Terme', 'Abbadia Lariana', 'Abbadia San Salvatore', ...,
       'Zogno', 'Zola Predosa', 'Zoppola'], dtype=object)

In [23]:
# Select cities that correlate with crop data 

climate_mean = climate_mean.apply(lambda row: row[climate_mean['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])

climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
603,603,Agrigento,2006,6,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
604,604,Agrigento,2007,19,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
605,605,Agrigento,2008,32,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
606,606,Agrigento,2009,45,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
607,607,Agrigento,2010,58,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


In [24]:
climate_mean.City.unique()

array(['Agrigento', 'Alessandria', 'Ancona',
       "Valle d'Aosta / Vallée d'Aoste", 'Arezzo', 'Ascoli Piceno',
       'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
       'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
       'Brescia', 'Brindisi', 'Sud Sardegna', 'Caltanissetta',
       'Campobasso', 'Carbonia-Iglesias', 'Caserta', 'Catania',
       'Catanzaro', 'Chieti', 'Latina', 'Como', 'Cosenza', 'Cremona',
       'Crotone', 'Cuneo', 'Enna', 'Fermo', 'Ferrara', 'Firenze',
       'Foggia', 'Frosinone', 'Genova', 'Gorizia', 'Grosseto', 'Imperia',
       'Isernia', 'La Spezia', 'Ogliastra', 'Lecce', 'Lecco', 'Livorno',
       'Lodi', 'Lucca', 'Macerata', 'Mantova', 'Matera', 'Messina',
       'Milano', 'Modena', 'Treviso', 'Monza e della Brianza', 'Napoli',
       'Novara', 'Nuoro', 'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo',
       'Parma', 'Pavia', 'Perugia', 'Pesaro e Urbino', 'Pescara',
       'Piacenza', 'Pisa', 'Pistoia', 'Pordenone', 'P

In [26]:
# Join both crop and climate datasets 
climate_fruits = pd.merge_ordered(fruits_total, climate_mean,  left_by="City").fillna(0)

In [27]:
climate_fruits.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,apple,2006,1292.0,70.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,apricot,2006,5610.0,340.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,cherry in complex,2006,335.8,46.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
4,Agrigento,clementine,2006,2720.0,160.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58


In [28]:
climate_fruits  =  climate_fruits.to_csv('climate_fruits_climate_2006-2021.csv', index=False)

In [29]:
climate_fruits = pd.read_csv('climate_fruits_climate_2006-2021.csv',skipinitialspace=True)
climate_fruits.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,apple,2006,1292.0,70.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
2,Agrigento,apricot,2006,5610.0,340.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
3,Agrigento,cherry in complex,2006,335.8,46.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
4,Agrigento,clementine,2006,2720.0,160.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58


Import fertilizers data 

In [30]:
fertilizer = pd.read_csv('Italy_crop_data/fertilizer2006-2021.csv',skipinitialspace=True)
fertilizer.head()

Unnamed: 0,City,Type_fertilizer,Year,Fertilizers_tonnes
0,Torino,calcium cyanamide,2006,122.7
1,Torino,calcium cyanamide,2007,181.2
2,Torino,calcium cyanamide,2008,522.4
3,Torino,calcium cyanamide,2009,205.1
4,Torino,calcium cyanamide,2010,5.4


Join crop and fertilizers datasets 

In [35]:
# Join both datasets 
crop_fertilizer_climate = pd.merge(climate_fruits , fertilizer, on=['Year', 'City'], how='left').fillna(0)
crop_fertilizer_climate

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00
1,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70
2,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50
3,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50
4,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201531,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00
201532,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00
201533,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00
201534,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,0.89,23.78,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00


In [36]:
# Save dataset
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('fruits_fertilizer_climate_2006-2021.csv', index=False)

In [37]:
fruits_crop  = pd.read_csv('fruits_fertilizer_climate_2006-2021.csv',skipinitialspace=True)
fruits_crop.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.0
1,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.7
2,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.5
3,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.5
4,Agrigento,almond,2006,26763.0,14090.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.7


Import locations lat and lon 

In [38]:
geo = pd.read_csv('it_locations2.csv',skipinitialspace=True)
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
3,Turin,45.07,7.7
4,Palermo,38.12,13.36


In [39]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', ..., 'Tollegno', 'Revere',
       'Rive d’Arcano'], dtype=object)

In [40]:
# Select cities that correlate with crop data 

geo = geo.apply(lambda row: row[geo['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
4,Palermo,38.12,13.36
5,Genova,44.41,8.93


In [41]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', 'Palermo', 'Genova', 'Bologna',
       'Firenze', 'Bari', 'Catania', 'Venezia', 'Verona', 'Messina',
       'Padova', 'Trieste', 'Taranto', 'Brescia', 'Parma', 'Prato',
       'Modena', 'Reggio di Calabria', "Reggio nell'Emilia", 'Perugia',
       'Ravenna', 'Livorno', 'Cagliari', 'Foggia', 'Rimini', 'Salerno',
       'Ferrara', 'Latina', 'Monza e della Brianza', 'Siracusa',
       'Bergamo', 'Pescara', 'Trento', 'Sassari', 'Vicenza', 'Terni',
       'Bolzano / Bozen', 'Novara', 'Piacenza', 'Ancona', 'Udine',
       'Arezzo', 'Forlì-Cesena', 'Lecce', 'Pesaro e Urbino',
       'Barletta-Andria-Trani', 'Alessandria', 'La Spezia', 'Pistoia',
       'Pisa', 'Catanzaro', 'Lucca', 'Brindisi', 'Treviso', 'Como',
       'Grosseto', 'Varese', 'Asti', 'Caserta', 'Ragusa', 'Pavia',
       'Cremona', 'Trapani', 'Viterbo', 'Cosenza', 'Potenza', 'Crotone',
       'Massa-Carrara', 'Caltanissetta', 'Benevento', 'Savona', 'Matera',
       'Olbia-Tempio', 'Agrigento', 

#### Join locations lon and lat to crop datasets 

In [42]:
# Join both crop and locations datasets 
final_fruits_crop = pd.merge_ordered(fruits_crop, geo,  left_by="City").fillna(0)

In [43]:
final_fruits_crop

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes,lat,lon
0,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00,37.31,13.58
1,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70,37.31,13.58
2,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50,37.31,13.58
3,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50,37.31,13.58
4,Agrigento,almond,2006,26763.00,14090.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70,37.31,13.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201531,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,nitrogen-potassium,113.00,0.00,0.00
201532,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,organic-nitrogen,221.00,0.00,0.00
201533,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,organic,0.00,0.00,0.00
201534,Sud Sardegna,sweet cherries,2021,851.40,158.00,7170.00,136.00,100.92,19.17,10.55,...,13.52,0.56,0.58,49.54,10.26,2.78,peaty-amend,0.00,0.00,0.00


In [44]:
# Save final dataset
final_fruits_crop  =  final_fruits_crop.to_csv('fruit_climate_2006-2021.csv', index=False)