# Omdena  - Milan Chapter Agrifoods
## AI for Sustainable agri-food systems: use of Satellite Imagery
### Data Exploration for grapes_olives in Italy 2006-2019
#### Author: Maria Fisher 


The main objective of this study is to have gather information about crop production in Italy for the period of 2006-2019. 

Crop dataset used in this study was downloaded from the Italian National Institute of Statistics (Istat).



In [None]:
import warnings 
warnings.filterwarnings("ignore")

import os
import pandas as pd
pd.options.display.float_format = "{:.2f}".format
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns 
import scipy 
import sklearn
import geopandas as gpd
import pgeocode
import folium
import sys
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

In [None]:
grapes = pd.read_csv('grapes-olives_final_2006-2021.csv',skipinitialspace=True)
grapes.head()

In [None]:
grapes.Type_crop.unique()

In [None]:
grapes = grapes[grapes["Year"] < 2020]

In [None]:
grapes.Type_crop.unique()

In [None]:
grapes.describe()

## Pre-processing dataset 

In [None]:
def show_info(grapes):
    print('DATASET SHAPE: ', grapes.shape, '\n')
    print('-'*50)
    print('FEATURE DATA TYPES:')
    print(grapes.info())
    print('\n', '-'*50)
    print('NUMBER OF UNIQUE VALUES PER FEATURE:', '\n')
    print(grapes.nunique())
    print('\n', '-'*50)
    print('NULL VALUES PER FEATURE')
    print(grapes.isnull().sum())
show_info(grapes)

In [None]:
grapes.describe()

## Total grapes production in 2006-2021

In [None]:
plt.figure(figsize= (10,5))
sns.barplot(x= 'Year', y= 'production_tonnes',data = grapes,palette='coolwarm')
plt.title('Total grapes olives production 2006-2019')
plt.xlabel('Year')
plt.ylabel('Total production (tonnes)')
plt.show()



## Cereal production by Cities 

In [None]:
grapes_region = grapes.groupby(by = grapes.City)['production_tonnes','City'].sum().reset_index().sort_values(by = 'production_tonnes', ascending = False).head(10)
grapes_region

In [None]:
plt.figure(figsize= (5,5))
sns.barplot(x=grapes_region['production_tonnes'],y= grapes_region['City'], orient='h', palette='coolwarm');
plt.title('Total grapes olives production 2006-2019 by City')
plt.xlabel('Total production')
plt.ylabel('Cities')
plt.show()

## grapes highest production 2006-2021

In [None]:
print(grapes.Type_crop.max())
print(grapes.Type_crop.value_counts())
print(grapes.Type_crop.nunique())



In [None]:
grapes.Type_crop.unique()


In [None]:
grapes = grapes.replace('Grapes for table use','grapes-table')
grapes = grapes.replace('grapes for other purposes n.e.c.','grapes-n.e.c')
grapes = grapes.replace('grapes for raisins','grapes-raisins')
grapes = grapes.replace('grapes for other wines n.e.c. (without pdo/pgi)','grapes-wines(N-pdo/pgi)')
grapes = grapes.replace('grapes for wines with protected designation of origin (pdo)','grapes-wines(Y-pdo)')
grapes = grapes.replace('grapes for wines with protected geographical indication (pgi)','grapes-wines(Y-pgi)')

In [None]:
print(grapes.total_ha.max())
print(grapes.total_ha.min())
print(grapes.total_ha.value_counts())
print(grapes.total_ha.nunique())


In [None]:
#grapes.drop(grapes[grapes['total_area_ha'] == 0.00 ].index , inplace=True)

In [None]:
grapes.describe().astype(int)

In [None]:
plt.figure(figsize=(10,5))
grapes['Type_crop'].value_counts().plot.bar()
plt.title('Total grapes olives production 2006-2019 by crop')
plt.ylabel('Total production (tonnes)')
plt.show()


In [None]:
grapes

In [None]:
Q1 = grapes.quantile(0.25)
Q3 = grapes.quantile(0.75)
IQR = Q3 - Q1
print(IQR)

In [None]:
# Remove outliers
grapes = grapes[~((grapes < (Q1 - 1.5 * IQR)) | (grapes > (Q3 + 1.5 * IQR))).any(axis=1)]
print(grapes.shape)


In [None]:
#Box plot and histplot (crop production distribuition)

plt.figure(figsize=(13,5))

plt.subplot(1,2,1)
sns.boxplot(data=grapes, x='production_tonnes', y="Type_crop",  color = 'darkblue', dodge=False)

plt.title(f'Box Plot total grapes production ')

plt.subplot(1,2,2)
sns.histplot(data=grapes, x= 'production_tonnes',  color = 'darkblue', kde=True, bins = 80)
plt.title(f'Distribution total grapes production')

plt.show()



In [None]:
#Box plot and histplot of total area(ha)

plt.figure(figsize=(17,5))

plt.subplot(1,2,1)
sns.boxplot(data=grapes, x='Year', y="total_ha",  color = 'darkblue', dodge=False)

plt.title(f'Box Plot total area (ha)')

plt.subplot(1,2,2)
sns.histplot(data=grapes, x= 'total_ha',  color = 'darkblue', kde=True, bins = 80)
plt.title(f'Distribution total total area (ha)')

plt.show()

In [None]:
print(grapes.production_tonnes.max())
print(grapes.production_tonnes.min())

In [None]:
print(grapes.total_ha.max())
print(grapes.total_ha.min())


In [None]:
print(grapes.Fertilizers_tonnes.max())

In [None]:
fig, ax = plt.subplots(3,1, sharex=True, figsize=(8,11))
fig.autofmt_xdate()

grapes['Year'] = (grapes['Year']).astype(str)

# Total crop production
crops = grapes[['Year', 'production_tonnes']]
crops = crops.groupby('Year').mean()
sns.lineplot(data= crops, x="Year", y="production_tonnes", ax=ax[0])

# Total area
area = grapes[['Year', 'total_ha']]
area = area.groupby('Year').mean()
sns.lineplot(data= area, x="Year", y="total_ha", ax=ax[1])

# Ferlizers
fertilizers = grapes[['Year', 'Fertilizers_tonnes']]
fertilizers = fertilizers.groupby('Year').mean()

sns.lineplot(data= fertilizers, x="Year", y="Fertilizers_tonnes", ax=ax[2])




In [None]:
#Crop harvested by year

plotcrop10 = sns.relplot(kind='line', data=grapes, x='Year', 
                         y='production_tonnes', hue='Type_crop', style='Type_crop', aspect=1.75)


In [None]:
#Fertilizers distributed by year

plotfertilizers10 = sns.relplot(kind='line', data=grapes, x='Year', 
                         y='Fertilizers_tonnes', hue='Type_fertilizer', style='Type_fertilizer', aspect=1.75)

# Climate data analysis

|Param_Code|Param_Name|Resolution|Depth [m]|Units|
|:--------:|:--------:|:--------:|:-------:|:---:|
|GWETPROF|Profile Soil Moisture|0.5° x 0.625°|1.34 - 8.53|water-free [0], saturated soil[1]| 
|GWETTOP|Surface Soil Wetness|0.5° x 0.625°|0.00 - 0.05| water-free [0], saturated soil[1]|
|GWETROOT|Root Zone Soil Wetness|0.5° x 0.625°|0.10 - 1.00|water-free [0], saturated soil[1]|
|CLOUD_AMT|Cloud Amount|---|---|%|
|TS|Earth Skin Temperature|---|---|°C|
|PS|Surface Pressure|---|---|kPa|
|RH2M|Relative Humidity at 2 Meters|---|---|%|
|QV2M|Specific Humidity at 2 Meters|---|---|g/kg|
|PRECTOTCORR|Precipitation Corrected|---|---|mm/day|
|T2M_MAX|Temperature at 2 Meters Maximum|---|---|°C|
|T2M_MIN|Temperature at 2 Meters Minimum|---|---|°C|
|T2M_RANGE|Temperature at 2 Meters Range|---|---|°C|
|WS2M|Wind Speed at 2 Meters|---|---|m/s|


  

*Precipitation Corrected (PRECTOTCORR)- [The bias corrected average of total precipitation at the surface of the earth in water mass (includes water content in snow)]

*Source? https://power.larc.nasa.gov/#resources

In [None]:

fig, ax = plt.subplots(3, 2, sharex=True, figsize=(20,10))

fig.suptitle('Climate in Italy 2006 - 2021')

grapes['Year'] = (grapes['Year']).astype(str)

# relative_humidity
sns.lineplot(data= grapes, x="Year", y="RH2M", ax=ax[0,0])

# precipitation
sns.lineplot(data= grapes, x="Year", y="PRECTOTCORR", ax=ax[0,1])

# temp_max
sns.lineplot(data= grapes, x="Year", y="T2M_MIN", ax=ax[1,0])

# temp_min
sns.lineplot(data= grapes, x="Year", y="T2M_MAX", ax=ax[1,1])

# surface_soil_wetness
sns.lineplot(data= grapes, x="Year", y="GWETTOP", ax=ax[2,0])

# root_zone_wetness
sns.lineplot(data= grapes, x="Year", y="GWETROOT", ax=ax[2,1])


In [None]:
grapes

In [None]:
grapes_model = grapes.drop(columns =['Year','PS','TS','GWETTOP', 'QV2M','WS2M','T2M_RANGE','GWETPROF','CLOUD_AMT','PRECTOTCORR','lat', 'lon'  ])
grapes_model

In [None]:
grapes_model = grapes_model.to_csv('grapes_model2006-2019.csv', index=False)

# References


http://dati.istat.it

https://maps.princeton.edu/catalog/stanford-mn871sp9778

https://www.crea.gov.it/documents/68457/0/ITACONTA+2020_ENG+DEF+xweb+%281%29.pdf/95c6b30a-1e18-8e94-d4ac-ce884aef76e8?t=1619527317576

https://seaborn.pydata.org/generated/seaborn.relplot.html

https://www.statisticshowto.com/variance-inflation-factor/

https://statisticsbyjim.com/regression/multicollinearity-in-regression-analysis/

https://lost-stats.github.io/Presentation/Figures/heatmap_colored_correlation_matrix.html

https://plotly.com/python/box-plots/

https://pycaret.org/