# The switzerland project


In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import folium

## Load some data

In [None]:
raw_CH_crops_dataset = pd.read_csv('../data/FAOSTAT_data_crops_CHandNeighbours.csv')

In [None]:
raw_CH_crops_dataset.head(60)

In [None]:
flags = pd.read_csv('../data/FAOSTAT_data_flags.csv')
flags

In [None]:
print("Size of the DataFrame: {s}\n".format(s=raw_CH_crops_dataset.shape))
print("Variable types present in DataFrame: \n{t}".format(t=raw_CH_crops_dataset.dtypes))

In [None]:
print(raw_CH_crops_dataset.isnull().values.any(axis=0)) 

Here, the only value that can be NaN is when data is official --> We would like to keep them
Therfore, no null values to delete.

In [None]:
raw_CH_crops_dataset.drop(index=raw_CH_crops_dataset[raw_CH_crops_dataset['Flag Description'].str.contains('Data not available')].index, inplace=True)

In [None]:
print(raw_CH_crops_dataset['Domain'].unique())
print(raw_CH_crops_dataset['Area'].unique())
print(raw_CH_crops_dataset['Element'].unique())
print(raw_CH_crops_dataset['Item'].unique())
print(raw_CH_crops_dataset['Year'].unique())
print(raw_CH_crops_dataset['Unit'].unique())
print(raw_CH_crops_dataset['Flag Description'].unique())

In [None]:
raw_CH_crops_dataset.head()

## Plot production of all countries over time for a selected crop

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

In [None]:
#Interactive visualization

#Plot the production of selected item for all countries over years
def viz_evolution(item):
    df_viz_evolution = raw_CH_crops_dataset.loc[raw_CH_crops_dataset['Element']=='Production'].loc[raw_CH_crops_dataset['Item']==item]
    
    # multiple line plot
    plt.figure(figsize=(20,10))
    plt.plot( 'Year', 'Value', data=df_viz_evolution.loc[df_viz_evolution['Area']=='Austria'], marker='', color='green',  label = 'Austria')
    plt.plot( 'Year', 'Value', data=df_viz_evolution.loc[df_viz_evolution['Area']=='France'], marker='', color='skyblue', label = 'France')
    plt.plot( 'Year', 'Value', data=df_viz_evolution.loc[df_viz_evolution['Area']=='Switzerland'], marker='', color='red', label = 'Switzerland', linewidth=3)
    plt.plot( 'Year', 'Value', data=df_viz_evolution.loc[df_viz_evolution['Area']=='Germany'], marker='', color='orange', label = 'Germany')
    plt.plot( 'Year', 'Value', data=df_viz_evolution.loc[df_viz_evolution['Area']=='Italy'], marker='', color='grey', label = 'Italy')
    
    plt.legend() 
    plt.title(f'Production of {item} in Switzerland and its neighbours throughout years', fontsize= 20)
    plt.xlabel("Year", fontsize= 20)
    plt.ylabel("Values", fontsize= 20)
    plt.show()
   
items = raw_CH_crops_dataset.Item.unique()
interact(viz_evolution, item = items)    

## Plot production/area harcested for all items of all countries over time

In [None]:
# plot area harvested of each country over years
crops_sum = raw_CH_crops_dataset.groupby(['Area', 'Element','Year']) \
                                .agg({'Value':'sum'}) \
                                .rename(columns={'Value':'Sum'}) \
                                .reset_index()
crops_sum.head(60) #ME donne la somme des area/yiel/production pour tous les items par an et par pays

In [None]:
#Interactive visualization

#Plot the area harvested (sum of all items) for all countries over years
def viz_sum_evolution(element):
    df_viz_sum_evolution = crops_sum.loc[crops_sum['Element']== element]
    
    # multiple line plot
    plt.figure(figsize=(20,10))
    plt.plot( 'Year', 'Sum', data=df_viz_sum_evolution.loc[df_viz_sum_evolution['Area']=='Austria'], marker='', color='green',  label = 'Austria')
    plt.plot( 'Year', 'Sum', data=df_viz_sum_evolution.loc[df_viz_sum_evolution['Area']=='France'], marker='', color='skyblue', label = 'France')
    plt.plot( 'Year', 'Sum', data=df_viz_sum_evolution.loc[df_viz_sum_evolution['Area']=='Switzerland'], marker='', color='red', label = 'Switzerland', linewidth=3)
    plt.plot( 'Year', 'Sum', data=df_viz_sum_evolution.loc[df_viz_sum_evolution['Area']=='Germany'], marker='', color='orange', label = 'Germany')
    plt.plot( 'Year', 'Sum', data=df_viz_sum_evolution.loc[df_viz_sum_evolution['Area']=='Italy'], marker='', color='grey', label = 'Italy')
    
    plt.legend() 
    plt.title(f'{element} of all items in Switzerland and its neighbours throughout years', fontsize= 20)
    plt.xlabel("Year", fontsize= 20)
    plt.ylabel("Values", fontsize= 20)
    plt.show()
   
elements = crops_sum.Element.unique()
interact(viz_sum_evolution, element = elements)  

## Load some more data --> Land use indicators (CH+neighbours)

In [None]:
raw_land_use_dataset = pd.read_csv('../data/FAOSTAT_data_LandUseIndicators.csv')

In [None]:
raw_land_use_dataset.head(60)

In [None]:
print("Size of the DataFrame: {s}\n".format(s=raw_land_use_dataset.shape))
print("Variable types present in DataFrame: \n{t}".format(t=raw_land_use_dataset.dtypes))

In [None]:
print(raw_land_use_dataset.isnull().values.any(axis=0))  # --> PERFECT!

In [None]:
print(raw_land_use_dataset['Domain'].unique())
print(raw_land_use_dataset['Area'].unique())
print(raw_land_use_dataset['Element'].unique())
print(raw_land_use_dataset['Item'].unique())
print(raw_land_use_dataset['Year'].unique())
print(raw_land_use_dataset['Unit'].unique())
print(raw_land_use_dataset['Flag Description'].unique())

In [None]:
import matplotlib.pyplot as plt

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
df_land = raw_land_use_dataset.loc[raw_land_use_dataset['Area']=='Switzerland'].loc[raw_land_use_dataset['Year']==2016].loc[raw_land_use_dataset['Element']=='Share in Land area']
df_agri = raw_land_use_dataset.loc[raw_land_use_dataset['Area']=='Switzerland'].loc[raw_land_use_dataset['Year']==2016].loc[raw_land_use_dataset['Element']=='Share in Agricultural land']

labels1 = df_land.Item
sizes1 = df_land.Value
explode = (0, 0, 0.1, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')
#plot = df_land.plot.pie(y='Value', figsize=(5, 5),labels=labels) #Another way to plot
fig1, ax1 = plt.subplots()
ax1.pie(sizes1, explode=explode,labels=labels1, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
ax1.title.set_text('Distribution of lands in Switzerland, year 2016')
fig1.set_facecolor('white')

labels2 = df_agri.Item
sizes2 = df_agri.Value
fig1, ax2 = plt.subplots()
ax2.pie(sizes2, labels=labels2, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax2.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
ax2.title.set_text('Distribution of agricultural lands in Switzerland, year 2016')

fig1.set_facecolor('white')
plt.show()

**Demographical data**

In [None]:
demography = pd.read_csv('../data/FAOSTAT_data_demography.csv')

In [None]:
demography

In [None]:
for col in demography:
    print (demography[col].unique())

In [None]:
demography = demography[['Area', 'Year', 'Value']]

In [None]:
pd.options.mode.chained_assignment = None  # default='warn', Mutes warnings when copying a slice from a DataFrame.
demography["Population"] = demography.Value.apply(lambda x: x*1000)
demography.drop(columns='Value')

**Importations for Switzerland**

In [None]:
CH_imports = pd.read_csv('../data/FAOSTAT_data_11-23-2019.csv')

In [None]:
CH_imports.shape

In [None]:
CH_imports

In [None]:
CH_imports.dtypes

In [None]:
CH_imports.Year.min()

In [None]:
for col in CH_imports:
    print (CH_imports[col].unique())

In [None]:
unofficial_stats_index = CH_imports.loc[CH_imports.Flag=='*'].index

In [None]:
# Drop the unofficial data
CH_imports = CH_imports.drop(index = unofficial_stats_index)

In [None]:
# Select only the data with Unit 'tonnes'
CH_imports = CH_imports.loc[CH_imports.Unit=='tonnes']

In [None]:
CH_imports = CH_imports[['Partner Countries', 'Item', 'Year', 'Unit', 'Value']]

In [None]:
# Sum the importations over all the partner countries
CH_imports = CH_imports.groupby(['Item', 'Year']).agg({'Value':'sum'})\
                                    .reset_index()

In [None]:
CH_crops = raw_CH_crops_dataset[['Area', 'Element', 'Item', 'Year', 'Unit', 'Value']]

In [None]:
# Merge importations data with production data
CH_data = CH_crops.loc[CH_crops.Area=='Switzerland'].loc[CH_crops.Element=='Production'].loc[CH_crops.Year>= 1986]\
                                    .merge(CH_imports,on=['Item', 'Year'], how='left', suffixes=('_crops', '_imports'))

In [None]:
CH_data

In [None]:
#Interactive visualization

#Plot the production of selected item for all countries over years
def viz_evolution(item):
    df_viz_evolution = CH_data.loc[CH_data['Item']==item]
    
    # multiple line plot
    plt.figure(figsize=(20,10))
    plt.plot( 'Year', 'Value_crops', data=df_viz_evolution, marker='', color='red', label = 'crops', linewidth=3)
    plt.plot('Year', 'Value_imports', data=df_viz_evolution, marker='', color='blue', label = 'imports', linewidth=3) 
    plt.legend() 
    plt.title(f'Production and imports of {item} in Switzerland throughout years', fontsize= 20)
    plt.xlabel("Year", fontsize= 20)
    plt.ylabel("Values [tonnes]", fontsize= 20)
    plt.show()
   
items = CH_data.Item.unique()
interact(viz_evolution, item = items)    

Most produced and imported products

In [None]:
CH_data.loc[CH_data.Year == 2016].sort_values(by='Value_crops', ascending = False).head(20)

In [None]:
CH_data.loc[CH_data.Year == 2016].sort_values(by='Value_imports', ascending = False).head(20)

In [None]:
import plotly
import plotly.graph_objects as go
y_wheat = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Wheat'].values[0,-2:]
y_potatoes = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Potatoes'].values[0,-2:]
y_beet = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Sugar beet'].values[0,-2:]
#y_wheat = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Wheat'].Value_crops.values
#y_potatoes = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Potatoes'].Value_crops.values
#y_beet = CH_data.loc[CH_data.Year == 2016].loc[CH_data.Item=='Sugar beet'].Value_crops.values
x=['Produced', 'Imported']
fig = go.Figure(go.Bar(x=x, y=y_wheat, name='Wheat'))
fig.add_trace(go.Bar(x=x, y=y_potatoes, name='Potatoes'))
fig.add_trace(go.Bar(x=x, y=y_beet, name='Sugar beet'))

fig.update_layout(barmode='stack', xaxis={'categoryorder':'category ascending'})
fig.show()

In [None]:
import plotly.graph_objects as go

y_wheat = CH_data.loc[CH_data.Item=='Potatoes'].values[:,-2:]
fig = go.Figure()
fig.add_trace(go.Scatter(x=CH_data.Year.unique(), y=y_wheat[:,0], fill='tonexty', name='Produced')) # fill down to xaxis
fig.add_trace(go.Scatter(x=CH_data.Year.unique(), y=y_wheat[:,1], fill='tozeroy', name='Imported')) # fill to trace0 y
fig.update_layout(
    title="Potatoes importations and productions throughout years in Switzerland")
fig.show()


In [None]:
total_crops_imports = CH_data.groupby('Year').agg({'Value_crops':'sum', 'Value_imports':'sum'})

In [None]:
total_crops_imports.Value_crops.values

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=CH_data.Year.unique(), y=total_crops_imports.Value_crops.values, fill='tonexty', name='Produced')) # fill down to xaxis
fig.add_trace(go.Scatter(x=CH_data.Year.unique(), y=total_crops_imports.Value_imports.values, fill='tozeroy', name='Imported')) # fill to trace0 y
fig.update_layout(
    title="Potatoes importations and productions throughout years in Switzerland")
fig.show()


In [None]:
CH_data2 = CH_data.copy().rename(columns={'Value_crops':'Country production', 'Value_imports':'Importation'})
CH_data_transformed = pd.melt(CH_data2, value_vars=['Country production', 'Importation'], id_vars=['Area', 'Element','Item','Year','Unit'], var_name='Input', value_name='Value')




In [None]:
CH_data_transformed.loc[CH_data_transformed.Item=='Potatoes']

In [None]:
CH_restrained = CH_data_transformed.loc[CH_data_transformed.Item.isin(['Apples','Wheat','Potatoes', 'Maize', 'Oats'])]

In [None]:
# Just trying a plot
import plotly.express as px
gapminder = px.data.gapminder()
fig = px.area(CH_restrained, x="Year", y="Value", color='Item',
      line_group="Input")
fig.show()

**Load Switzerland temperatures**

In [None]:
CH_temperatures = pd.read_csv('../data/10.18751-Climate-Timeseries-CHTM-1.1-swiss.txt', sep="\t", header=0, skiprows=15)

In [None]:
CH_temperatures = CH_temperatures.loc[CH_temperatures.time>=1986].loc[CH_temperatures.time<=2017]

In [None]:
CH_temperatures = CH_temperatures.iloc[:,-3:]

In [None]:
CH_temperatures

In [None]:
years = np.sort(CH_data.Year.unique())
fig, ax1 = plt.subplots()
data1 = CH_data.loc[CH_data.Item=='Potatoes'].Value_crops
data2 = CH_temperatures.year

color = 'tab:red'
ax1.set_xlabel('year')
ax1.set_ylabel('production', color=color)
ax1.plot(years, data1, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('temperature', color=color)  # we already handled the x-label with ax1
ax2.plot(years, data2, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.title('Potatoes production and temperatures every year')
plt.show()