# Omdena  - Milan Chapter Agrifoods
## AI for Sustainable agri-food systems: use of Satellite Imagery
### Exploratory analysis of industrial crop production in Italy 2006-2021

#### Author: Maria Fisher 


The main objective of this study is to have gather information about crop production in Italy for the period of 2006-2022. 

Crop dataset used in this study was downloaded from the Italian National Institute of Statistics (Istat).



In [1]:
import warnings 
warnings.filterwarnings("ignore")

import os
import pandas as pd
pd.options.display.float_format = "{:.2f}".format
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns 
import scipy 
import sklearn
import geopandas as gpd
import pgeocode
import folium
import sys
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

In [2]:
industrial_crop = pd.read_csv('./Italy_crop_data/industrial_crop.csv',skipinitialspace=True)
industrial_crop.head()

Unnamed: 0,ITTER107,Territory,TIPO_DATO5,Data type,AGRI_MADRE,Type of crop,TIME,Select time,Value,Flag Codes,Flags
0,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,PARSNGREEN,parsley in greenhouses,2006,2006,750,e,estimate data
1,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,PARSNGREEN,parsley in greenhouses,2007,2007,780,,
2,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,PARSNGREEN,parsley in greenhouses,2008,2008,810,,
3,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,PARSNGREEN,parsley in greenhouses,2009,2009,773,e,estimate data
4,ITC16,Cuneo,TP_QUIN_EXT,total production - quintals,PARSNGREEN,parsley in greenhouses,2010,2010,788,e,estimate data


## Pre-processing dataset 

In [3]:
# Drop Columns
industrial_crop = industrial_crop.drop(columns =['ITTER107','TIPO_DATO5','AGRI_MADRE', 'TIME', 'Flag Codes','Flags' ])
industrial_crop

Unnamed: 0,Territory,Data type,Type of crop,Select time,Value
0,Cuneo,total production - quintals,parsley in greenhouses,2006,750
1,Cuneo,total production - quintals,parsley in greenhouses,2007,780
2,Cuneo,total production - quintals,parsley in greenhouses,2008,810
3,Cuneo,total production - quintals,parsley in greenhouses,2009,773
4,Cuneo,total production - quintals,parsley in greenhouses,2010,788
...,...,...,...,...,...
19180,Trentino Alto Adige / Südtirol,harvested production - quintals,hops,2021,60
19181,Grosseto,total area - hectares,other oilseed crops (excluding sesame),2021,72
19182,Udine,total area - hectares,hops,2021,1
19183,Arezzo,total area - hectares,other textile crops,2021,4


In [4]:
# Rename Columns
industrial_crop = industrial_crop.rename(columns = {'Select time':'Year', 'Type of crop':'Type_crop', 'Data type':'Data_type', 'Territory':'City'})


In [5]:
industrial_crop = industrial_crop[industrial_crop["Year"] < 2022]

In [6]:
def show_info(industrial_crop):
    print('DATASET SHAPE: ', industrial_crop.shape, '\n')
    print('-'*50)
    print('FEATURE DATA TYPES:')
    print(industrial_crop.info())
    print('\n', '-'*50)
    print('NUMBER OF UNIQUE VALUES PER FEATURE:', '\n')
    print(industrial_crop.nunique())
    print('\n', '-'*50)
    print('NULL VALUES PER FEATURE')
    print(industrial_crop.isnull().sum())
show_info(industrial_crop)

DATASET SHAPE:  (18582, 5) 

--------------------------------------------------
FEATURE DATA TYPES:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18582 entries, 0 to 19184
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   City       18582 non-null  object
 1   Data_type  18582 non-null  object
 2   Type_crop  18582 non-null  object
 3   Year       18582 non-null  int64 
 4   Value      18582 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 871.0+ KB
None

 --------------------------------------------------
NUMBER OF UNIQUE VALUES PER FEATURE: 

City          108
Data_type       4
Type_crop      18
Year           16
Value        3846
dtype: int64

 --------------------------------------------------
NULL VALUES PER FEATURE
City         0
Data_type    0
Type_crop    0
Year         0
Value        0
dtype: int64


## Cities in Italy producing industrial_crop

In [7]:
# Check cities names
industrial_crop.City.unique()


array(['Cuneo', 'Imperia', 'Genova', 'Varese', 'Lecco', 'Milano',
       'Bergamo', 'Brescia', 'Cremona', 'Mantova', 'Verona', 'Vicenza',
       'Treviso', 'Venezia', 'Padova', 'Rovigo', 'Trieste', 'Parma',
       'Bologna', 'Ferrara', 'Forlì-Cesena', 'Rimini', 'Massa-Carrara',
       'Lucca', 'Pistoia', 'Firenze', 'Livorno', 'Arezzo', 'Grosseto',
       'Ancona', 'Macerata', 'Ascoli Piceno', 'Roma', 'Latina',
       'Frosinone', 'Napoli', 'Salerno', 'Lecce', 'Catanzaro', 'Trapani',
       'Palermo', 'Enna', 'Ragusa', 'Siracusa', 'Sassari', 'Nuoro',
       'Cagliari', 'Oristano', 'Olbia-Tempio', 'Medio Campidano',
       'Carbonia-Iglesias', 'Como', 'Ravenna', "L'Aquila", 'Biella',
       'Piacenza', 'Teramo', 'Udine', 'Pesaro e Urbino', 'Siena',
       'Belluno', 'Rieti', 'Lodi', 'Pisa', 'Verbano-Cusio-Ossola',
       'Crotone', 'Torino', 'Caltanissetta', 'Vercelli', 'Cosenza',
       'Alessandria', 'Modena', 'Perugia', 'Taranto', 'Pescara', 'Chieti',
       'Bari', "Reggio nell'Emili

In [8]:
industrial_crop1 =  industrial_crop.loc[ industrial_crop['Data_type'] == 'total production - quintals ']

In [9]:
industrial_crop1['total_production'] = industrial_crop1['Value'][industrial_crop1['Data_type']=='total production - quintals ']
industrial_crop1.head(10)   

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_production
0,Cuneo,total production - quintals,parsley in greenhouses,2006,750,750
1,Cuneo,total production - quintals,parsley in greenhouses,2007,780,780
2,Cuneo,total production - quintals,parsley in greenhouses,2008,810,810
3,Cuneo,total production - quintals,parsley in greenhouses,2009,773,773
4,Cuneo,total production - quintals,parsley in greenhouses,2010,788,788
5,Cuneo,total production - quintals,parsley in greenhouses,2011,788,788
6,Cuneo,total production - quintals,parsley in greenhouses,2012,783,783
7,Cuneo,total production - quintals,parsley in greenhouses,2013,600,600
8,Cuneo,total production - quintals,parsley in greenhouses,2014,430,430
9,Cuneo,total production - quintals,parsley in greenhouses,2015,336,336


In [10]:
industrial_crop2 =  industrial_crop.loc[ industrial_crop['Data_type'] == 'total area - hectares']
industrial_crop2 

Unnamed: 0,City,Data_type,Type_crop,Year,Value
1697,Ravenna,total area - hectares,soya beans,2006,1900
1698,Ravenna,total area - hectares,soya beans,2007,800
1699,Ravenna,total area - hectares,soya beans,2008,450
1700,Ravenna,total area - hectares,soya beans,2009,790
1701,Ravenna,total area - hectares,soya beans,2010,1174
...,...,...,...,...,...
19176,Trentino Alto Adige / Südtirol,total area - hectares,hops,2021,3
19181,Grosseto,total area - hectares,other oilseed crops (excluding sesame),2021,72
19182,Udine,total area - hectares,hops,2021,1
19183,Arezzo,total area - hectares,other textile crops,2021,4


In [11]:
industrial_crop2['total_area_ha'] = industrial_crop2['Value'][industrial_crop2['Data_type']=='total area - hectares']
industrial_crop2.tail(10) 

Unnamed: 0,City,Data_type,Type_crop,Year,Value,total_area_ha
19171,Arezzo,total area - hectares,hops,2020,1,1
19172,Arezzo,total area - hectares,hops,2021,1,1
19173,Siena,total area - hectares,hops,2020,2,2
19174,Siena,total area - hectares,hops,2021,1,1
19175,Trentino Alto Adige / Südtirol,total area - hectares,hops,2020,3,3
19176,Trentino Alto Adige / Südtirol,total area - hectares,hops,2021,3,3
19181,Grosseto,total area - hectares,other oilseed crops (excluding sesame),2021,72,72
19182,Udine,total area - hectares,hops,2021,1,1
19183,Arezzo,total area - hectares,other textile crops,2021,4,4
19184,Grosseto,total area - hectares,other textile crops,2021,12,12


In [12]:
# Join both datasets 
industrial_crop_total =  pd.merge_ordered(industrial_crop1, industrial_crop2, on=['Year', 'City', 'Type_crop'], how='left')
industrial_crop_total

Unnamed: 0,City,Data_type_x,Type_crop,Year,Value_x,total_production,Data_type_y,Value_y,total_area_ha
0,Agrigento,total production - quintals,parsley in open field,2006,50,50,total area - hectares,1.00,1.00
1,Alessandria,total production - quintals,hemp,2006,300,300,total area - hectares,6.00,6.00
2,Alessandria,total production - quintals,rape,2006,8640,8640,total area - hectares,320.00,320.00
3,Alessandria,total production - quintals,soya beans,2006,41250,41250,total area - hectares,1330.00,1330.00
4,Alessandria,total production - quintals,sunflower,2006,203513,203513,total area - hectares,5731.00,5731.00
...,...,...,...,...,...,...,...,...,...
6181,Viterbo,total production - quintals,flax,2021,1140,1140,total area - hectares,50.00,50.00
6182,Viterbo,total production - quintals,hemp,2021,215,215,total area - hectares,20.00,20.00
6183,Viterbo,total production - quintals,rape,2021,290,290,total area - hectares,26.00,26.00
6184,Viterbo,total production - quintals,sunflower,2021,16760,16760,total area - hectares,1200.00,1200.00


In [13]:
industrial_crop_total = industrial_crop_total.drop(columns =['Data_type_x','Data_type_y', 'Value_x', 'Value_y' ])
industrial_crop_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,parsley in open field,2006,50,1.00
1,Alessandria,hemp,2006,300,6.00
2,Alessandria,rape,2006,8640,320.00
3,Alessandria,soya beans,2006,41250,1330.00
4,Alessandria,sunflower,2006,203513,5731.00
...,...,...,...,...,...
6181,Viterbo,flax,2021,1140,50.00
6182,Viterbo,hemp,2021,215,20.00
6183,Viterbo,rape,2021,290,26.00
6184,Viterbo,sunflower,2021,16760,1200.00


In [14]:
# Transform values from quintals to tonnes
industrial_crop_total['total_production'] = industrial_crop_total['total_production']/10
industrial_crop_total

Unnamed: 0,City,Type_crop,Year,total_production,total_area_ha
0,Agrigento,parsley in open field,2006,5.00,1.00
1,Alessandria,hemp,2006,30.00,6.00
2,Alessandria,rape,2006,864.00,320.00
3,Alessandria,soya beans,2006,4125.00,1330.00
4,Alessandria,sunflower,2006,20351.30,5731.00
...,...,...,...,...,...
6181,Viterbo,flax,2021,114.00,50.00
6182,Viterbo,hemp,2021,21.50,20.00
6183,Viterbo,rape,2021,29.00,26.00
6184,Viterbo,sunflower,2021,1676.00,1200.00


In [15]:
# Rename column value to production tonnes
industrial_crop_total = industrial_crop_total.rename(columns = {'total_production':'production_tonnes'})
industrial_crop_total

Unnamed: 0,City,Type_crop,Year,production_tonnes,total_area_ha
0,Agrigento,parsley in open field,2006,5.00,1.00
1,Alessandria,hemp,2006,30.00,6.00
2,Alessandria,rape,2006,864.00,320.00
3,Alessandria,soya beans,2006,4125.00,1330.00
4,Alessandria,sunflower,2006,20351.30,5731.00
...,...,...,...,...,...
6181,Viterbo,flax,2021,114.00,50.00
6182,Viterbo,hemp,2021,21.50,20.00
6183,Viterbo,rape,2021,29.00,26.00
6184,Viterbo,sunflower,2021,1676.00,1200.00


In [16]:
# Import climate dataset
climate_mean = pd.read_csv('climate2006-2021_mean.csv')
climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,0,Abano Terme,2006,6,100.76,13.3,7.32,71.53,1.56,0.66,25.65,3.84,0.65,0.67,56.54,21.8,1.68
1,1,Abano Terme,2007,19,100.66,14.06,7.23,68.75,1.59,0.61,26.09,4.63,0.61,0.63,54.11,21.47,1.94
2,2,Abano Terme,2008,32,100.63,13.64,7.72,73.95,1.67,0.69,24.69,4.3,0.67,0.7,59.45,20.38,2.77
3,3,Abano Terme,2009,45,100.47,13.71,7.72,72.75,1.69,0.7,24.92,4.11,0.7,0.72,57.77,20.82,2.45
4,4,Abano Terme,2010,58,100.33,12.55,7.54,75.6,1.7,0.75,23.83,2.73,0.73,0.76,61.55,21.1,3.15


In [17]:
# Check cities names
climate_mean.City.unique()

array(['Abano Terme', 'Abbadia Lariana', 'Abbadia San Salvatore', ...,
       'Zogno', 'Zola Predosa', 'Zoppola'], dtype=object)

In [18]:
# Select cities that correlate with crop data 

climate_mean = climate_mean.apply(lambda row: row[climate_mean['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])

climate_mean.head()

Unnamed: 0.2,Unnamed: 0.1,City,Year,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
603,603,Agrigento,2006,6,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
604,604,Agrigento,2007,19,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
605,605,Agrigento,2008,32,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
606,606,Agrigento,2009,45,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
607,607,Agrigento,2010,58,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


In [19]:
climate_mean.City.unique()

array(['Agrigento', 'Alessandria', 'Ancona',
       "Valle d'Aosta / Vallée d'Aoste", 'Arezzo', 'Ascoli Piceno',
       'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
       'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
       'Brescia', 'Brindisi', 'Sud Sardegna', 'Caltanissetta',
       'Campobasso', 'Carbonia-Iglesias', 'Caserta', 'Catania',
       'Catanzaro', 'Chieti', 'Latina', 'Como', 'Cosenza', 'Cremona',
       'Crotone', 'Cuneo', 'Enna', 'Fermo', 'Ferrara', 'Firenze',
       'Foggia', 'Frosinone', 'Genova', 'Gorizia', 'Grosseto', 'Imperia',
       'Isernia', 'La Spezia', 'Ogliastra', 'Lecce', 'Lecco', 'Livorno',
       'Lodi', 'Lucca', 'Macerata', 'Mantova', 'Matera', 'Messina',
       'Milano', 'Modena', 'Treviso', 'Monza e della Brianza', 'Napoli',
       'Novara', 'Nuoro', 'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo',
       'Parma', 'Pavia', 'Perugia', 'Pesaro e Urbino', 'Pescara',
       'Piacenza', 'Pisa', 'Pistoia', 'Pordenone', 'P

In [20]:
# Join both crop and climate datasets 
climate_industrial_crop = pd.merge_ordered(industrial_crop_total, climate_mean,  left_by="City").fillna(0)

In [21]:
climate_industrial_crop.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,parsley in open field,2007,9.0,2.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
2,Agrigento,parsley in open field,2008,8.8,2.0,605.0,32.0,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
3,Agrigento,parsley in open field,2009,8.4,2.0,606.0,45.0,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
4,Agrigento,parsley in open field,2010,9.7,3.0,607.0,58.0,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


In [23]:
climate_industrial_crop =  climate_industrial_crop.to_csv('industrial_crop_climate_2006-2021.csv', index=False)

In [24]:
climate_industrial_crop = pd.read_csv('industrial_crop_climate_2006-2021.csv',skipinitialspace=True)
climate_industrial_crop.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,RH2M,WS2M,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR
0,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,71.95,2.61,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58
1,Agrigento,parsley in open field,2007,9.0,2.0,604.0,19.0,96.8,17.18,8.35,71.72,2.75,0.55,28.65,7.18,0.59,0.6,41.58,21.47,1.78
2,Agrigento,parsley in open field,2008,8.8,2.0,605.0,32.0,96.83,16.91,8.27,71.72,2.66,0.58,28.02,6.84,0.61,0.61,39.13,21.19,1.61
3,Agrigento,parsley in open field,2009,8.4,2.0,606.0,45.0,96.63,16.22,8.67,75.45,2.73,0.7,26.94,7.3,0.72,0.73,42.54,19.64,2.82
4,Agrigento,parsley in open field,2010,9.7,3.0,607.0,58.0,96.57,16.2,8.6,75.71,2.87,0.69,26.97,6.97,0.71,0.72,46.03,19.99,2.79


Import fertilizers data 

In [25]:
fertilizer = pd.read_csv('Italy_crop_data/fertilizer2006-2021.csv',skipinitialspace=True)
fertilizer.head()

Unnamed: 0,City,Type_fertilizer,Year,Fertilizers_tonnes
0,Torino,calcium cyanamide,2006,122.7
1,Torino,calcium cyanamide,2007,181.2
2,Torino,calcium cyanamide,2008,522.4
3,Torino,calcium cyanamide,2009,205.1
4,Torino,calcium cyanamide,2010,5.4


Join crop and fertilizers datasets 

In [26]:
# Join both datasets 
crop_fertilizer_climate = pd.merge(climate_industrial_crop , fertilizer, on=['Year', 'City'], how='left').fillna(0)
crop_fertilizer_climate

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00
1,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70
2,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50
3,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50
4,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65614,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,0.64,16.99,-5.68,0.67,0.61,64.00,22.67,1.90,nitrogen-potassium,0.00
65615,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,0.64,16.99,-5.68,0.67,0.61,64.00,22.67,1.90,organic-nitrogen,1.00
65616,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,0.64,16.99,-5.68,0.67,0.61,64.00,22.67,1.90,urea,0.00
65617,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,0.64,16.99,-5.68,0.67,0.61,64.00,22.67,1.90,ammonium sulphate,0.00


In [27]:
crop_fertilizer_climate  =  crop_fertilizer_climate.to_csv('industrial_crop_fertilizer_climate_2006-2021.csv', index=False)

In [29]:
crop_fertilizer_climate = pd.read_csv('industrial_crop_fertilizer_climate_2006-2021.csv',skipinitialspace=True)
crop_fertilizer_climate.head()

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,GWETTOP,T2M_MAX,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes
0,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.0
1,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.7
2,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.5
3,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.5
4,Agrigento,parsley in open field,2006,5.0,1.0,603.0,6.0,96.87,16.7,8.33,...,0.58,28.51,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.7


Import locations lat and lon 

In [30]:
geo = pd.read_csv('it_locations2.csv',skipinitialspace=True)
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
3,Turin,45.07,7.7
4,Palermo,38.12,13.36


In [31]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', ..., 'Tollegno', 'Revere',
       'Rive d’Arcano'], dtype=object)

In [32]:
# Select cities that correlate with crop data 

geo = geo.apply(lambda row: row[geo['City'].isin(['Agrigento', 'Alessandria', 'Ancona', 'Arezzo', 'Ascoli Piceno',
                                                                             'Asti', 'Avellino', 'Bari', 'Barletta-Andria-Trani', 'Belluno',
                                                                             'Benevento', 'Bergamo', 'Biella', 'Bologna', 'Bolzano / Bozen',
                                                                             'Brescia', 'Brindisi', 'Cagliari', 'Caltanissetta', 'Campobasso',
                                                                             'Carbonia-Iglesias', 'Caserta', 'Catania', 'Catanzaro', 'Chieti',
                                                                             'Como', 'Cosenza', 'Cremona', 'Crotone', 'Cuneo', 'Enna', 'Fermo',
                                                                             'Ferrara', 'Firenze', 'Foggia', 'Forlì-Cesena', 'Frosinone',
                                                                             'Genova', 'Gorizia', 'Grosseto', 'Imperia', 'Isernia', "L'Aquila",
                                                                             'La Spezia', 'Latina', 'Lecce', 'Lecco', 'Livorno', 'Lodi',
                                                                             'Lucca', 'Macerata', 'Mantova', 'Massa-Carrara', 'Matera',
                                                                             'Medio Campidano', 'Messina', 'Milano', 'Modena',
                                                                             'Monza e della Brianza', 'Napoli', 'Novara', 'Nuoro', 'Ogliastra',
                                                                             'Olbia-Tempio', 'Oristano', 'Padova', 'Palermo', 'Parma', 'Pavia',
                                                                             'Perugia', 'Pesaro e Urbino', 'Pescara', 'Piacenza', 'Pisa',
                                                                             'Pistoia', 'Pordenone', 'Potenza', 'Prato', 'Ragusa', 'Ravenna',
                                                                             'Reggio di Calabria', "Reggio nell'Emilia", 'Rieti', 'Rimini',
                                                                             'Roma', 'Rovigo', 'Salerno', 'Sassari', 'Savona', 'Siena',
                                                                             'Siracusa', 'Sondrio', 'Sud Sardegna', 'Taranto', 'Teramo',
                                                                             'Terni', 'Torino', 'Trapani', 'Trentino Alto Adige / Südtirol',
                                                                             'Trento', 'Treviso', 'Trieste', 'Udine',
                                                                             "Valle d'Aosta / Vallée d'Aoste", 'Varese', 'Venezia',
                                                                             'Verbano-Cusio-Ossola', 'Vercelli', 'Verona', 'Vibo Valentia',
                                                                             'Vicenza', 'Viterbo'])])
geo.head()

Unnamed: 0,City,lat,lon
0,Roma,41.89,12.48
1,Milano,45.47,9.19
2,Napoli,40.83,14.25
4,Palermo,38.12,13.36
5,Genova,44.41,8.93


In [33]:
geo.City.unique()

array(['Roma', 'Milano', 'Napoli', 'Palermo', 'Genova', 'Bologna',
       'Firenze', 'Bari', 'Catania', 'Venezia', 'Verona', 'Messina',
       'Padova', 'Trieste', 'Taranto', 'Brescia', 'Parma', 'Prato',
       'Modena', 'Reggio di Calabria', "Reggio nell'Emilia", 'Perugia',
       'Ravenna', 'Livorno', 'Cagliari', 'Foggia', 'Rimini', 'Salerno',
       'Ferrara', 'Latina', 'Monza e della Brianza', 'Siracusa',
       'Bergamo', 'Pescara', 'Trento', 'Sassari', 'Vicenza', 'Terni',
       'Bolzano / Bozen', 'Novara', 'Piacenza', 'Ancona', 'Udine',
       'Arezzo', 'Forlì-Cesena', 'Lecce', 'Pesaro e Urbino',
       'Barletta-Andria-Trani', 'Alessandria', 'La Spezia', 'Pistoia',
       'Pisa', 'Catanzaro', 'Lucca', 'Brindisi', 'Treviso', 'Como',
       'Grosseto', 'Varese', 'Asti', 'Caserta', 'Ragusa', 'Pavia',
       'Cremona', 'Trapani', 'Viterbo', 'Cosenza', 'Potenza', 'Crotone',
       'Massa-Carrara', 'Caltanissetta', 'Benevento', 'Savona', 'Matera',
       'Olbia-Tempio', 'Agrigento', 

In [34]:
# Join both crop and locations datasets 
final_industrial_crop = pd.merge_ordered(crop_fertilizer_climate, geo,  left_by="City").fillna(0)

In [35]:
final_industrial_crop

Unnamed: 0.2,City,Type_crop,Year,production_tonnes,total_area_ha,Unnamed: 0.1,Unnamed: 0,PS,TS,QV2M,...,T2M_MIN,GWETPROF,GWETROOT,CLOUD_AMT,T2M_RANGE,PRECTOTCORR,Type_fertilizer,Fertilizers_tonnes,lat,lon
0,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,calcium cyanamide,0.00,37.31,13.58
1,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,nitrogen-potassium,27.70,37.31,13.58
2,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,peaty-amend,174.50,37.31,13.58
3,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic-nitrogen,760.50,37.31,13.58
4,Agrigento,parsley in open field,2006,5.00,1.00,603.00,6.00,96.87,16.70,8.33,...,6.77,0.61,0.62,39.63,21.74,1.58,organic,1343.70,37.31,13.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65614,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,-5.68,0.67,0.61,64.00,22.67,1.90,nitrogen-potassium,0.00,45.74,7.32
65615,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,-5.68,0.67,0.61,64.00,22.67,1.90,organic-nitrogen,1.00,45.74,7.32
65616,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,-5.68,0.67,0.61,64.00,22.67,1.90,urea,0.00,45.74,7.32
65617,Valle d'Aosta / Vallée d'Aoste,valerian in greenhouses,2021,0.60,0.00,1909.00,136.00,85.07,4.57,5.42,...,-5.68,0.67,0.61,64.00,22.67,1.90,ammonium sulphate,0.00,45.74,7.32


In [36]:
# Save final dataset
final_industrial_crop  =  final_industrial_crop.to_csv('industrial_crop_final_2006-2021.csv', index=False)