# Extracción de datos sobre importación de cítricos desde fuera de la UE

`En este notebook se cargan los datos de importación de cítricos de los 27 estados miembro de la UE. Se crea un fichero .csv para cada uno de ellos y después se crea un dashboard con información relevante a importaciones de los mismos.`

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
df_citricos = pd.read_excel('./Datos_UE/Import_Citricos.xlsx')
df_citricos.head(5)

Unnamed: 0,Sector,Marketing Year,Week,Start date,End date,Member State Code,Member State,Partner Code,Partner,Product,Product 8 digits,Product 10 digits,Euro Value,Unit Value,kg,kg Equivalent,Coefficient,Procedure,Preference
0,Citrus Fruit,2024/2025,6,2024-11-04,2024-11-10,AT,Austria,AL,Albania,Fresh or dried clementines,8052200,08052200 20,1891.2,1.182,1600.0,1600.0,1,40,300
1,Citrus Fruit,2024/2025,6,2024-11-04,2024-11-10,AT,Austria,EG,Egypt,Fresh or dried lemons,8055010,08055010 10,249.99,2.4999,100.0,100.0,1,40,300
2,Citrus Fruit,2024/2025,6,2024-11-04,2024-11-10,AT,Austria,TR,Türkiye,Fresh or dried mandarines,8052110,08052110 10,30485.41,1.561753,19520.0,19520.0,1,42,300
3,Citrus Fruit,2024/2025,6,2024-11-04,2024-11-10,AT,Austria,TR,Türkiye,Fresh or dried mandarines,8052190,08052190 11,48993.63,1.430972,34238.0,34238.0,1,40,300
4,Citrus Fruit,2024/2025,6,2024-11-04,2024-11-10,AT,Austria,TR,Türkiye,Fresh or dried lemons,8055010,08055010 10,18203.4,0.777524,23412.0,23412.0,1,40,300


In [3]:
def separation_country(df, countries):
    print('Creando fichero de cada país...')
    columns = ['Marketing Year','Week','Start date','End date','Member State','Partner','Product','Euro Value','Unit Value','kg','Procedure','Preference']
    product_values = ['Fresh oranges','Fresh or dried grapefruit','Fresh or dried mandarines','Fresh or dried lemons','Fresh or dried clementines','Fresh or dried oranges']
    filtered_df = df[df['Product'].isin(product_values)]
    for country in countries:
        new_df = filtered_df.loc[filtered_df['Member State']==country, columns]
        new_df.to_csv(f'./Countries_citricos/{country}.csv', index=False)
        print(f'Fichero de {country} creado con ÉXITO')

In [4]:
df_citricos['Product'].value_counts()

Product
Orange juice                              42067
Fresh oranges                             34242
Fresh or dried grapefruit                 22981
Fresh or dried mandarines                 20699
Fresh or dried lemons                     19250
Fresh or dried limes                      13423
Fruit jams, puree and jellies             11435
Other fresh or dried citrus                6811
Fresh or dried clementines                 5098
Fresh or dried oranges                     4462
Mixed citrus fruit and pineapple juice     2415
Name: count, dtype: int64

In [5]:
countries = df_citricos['Member State'].unique()
separation_country(df_citricos,countries)

Creando fichero de cada país...
Fichero de Austria creado con ÉXITO
Fichero de Belgium creado con ÉXITO
Fichero de Bulgaria creado con ÉXITO
Fichero de Germany creado con ÉXITO
Fichero de Denmark creado con ÉXITO
Fichero de Greece creado con ÉXITO
Fichero de Spain creado con ÉXITO
Fichero de Finland creado con ÉXITO
Fichero de France creado con ÉXITO
Fichero de Croatia creado con ÉXITO
Fichero de Hungary creado con ÉXITO
Fichero de Ireland creado con ÉXITO
Fichero de Lithuania creado con ÉXITO
Fichero de Latvia creado con ÉXITO
Fichero de Malta creado con ÉXITO
Fichero de Netherlands creado con ÉXITO
Fichero de Poland creado con ÉXITO
Fichero de Portugal creado con ÉXITO
Fichero de Romania creado con ÉXITO
Fichero de Sweden creado con ÉXITO
Fichero de Slovenia creado con ÉXITO
Fichero de Slovakia creado con ÉXITO
Fichero de Cyprus creado con ÉXITO
Fichero de Czechia creado con ÉXITO
Fichero de Estonia creado con ÉXITO
Fichero de Italy creado con ÉXITO
Fichero de Luxembourg creado con É

In [6]:
def top_partners(df):

    df_partner = df.groupby(['Start date','Partner'])['Euro Value'].sum().reset_index()
    total_exports = df_partner.groupby('Partner')['Euro Value'].sum()

    top_5_partners = total_exports.nlargest(5).index
    df_partner_filtered = df_partner[df_partner['Partner'].isin(top_5_partners)]

    pivoted = df_partner_filtered.pivot(index='Start date', columns='Partner', values='Euro Value').fillna(0)
    pivoted = pivoted[top_5_partners]
    
    return pivoted

In [7]:
def pie_partners(df):
    df = df[df['Start date']>'01-11-2023'] # Tomamos como último ciclo desde noviembre de 2023
    df_grouped = df.groupby('Partner')['Euro Value'].sum().reset_index()

    top_5_partners = df_grouped.nlargest(5, 'Euro Value')
    data = top_5_partners['Euro Value']
    labels = top_5_partners['Partner']

    return data, labels

In [8]:
def products(df):
    df_product = df.groupby(['Start date','Product'])['kg'].sum().reset_index()
    total_exports = df_product.groupby('Product')['kg'].sum()

    top_products = total_exports.nlargest(5).index
    df_product_filtered = df_product[df_product['Product'].isin(top_products)]

    pivoted = df_product_filtered.pivot(index='Start date', columns='Product', values='kg').fillna(0)
    pivoted = pivoted[top_products]
    
    return pivoted

In [9]:
def pie_products(df):
    df = df[df['Start date']>'01-11-2023']
    df_grouped = df.groupby('Product')['kg'].sum().reset_index()

    top_products = df_grouped.nlargest(5, 'kg')
    data = top_products['kg']
    labels = top_products['Product']

    return data, labels

In [10]:
def dashboard(df,name):

    fig = plt.figure(figsize=(22,16))
    gs = fig.add_gridspec(2, 3)

  
    table = top_partners(df)
    ax1 = fig.add_subplot(gs[0, :2])
    for country in table.columns:
        ax1.plot(table.index, table[country], label=country)
    ax1.set_title("Principales partners", fontsize=16)
    ax1.set_ylabel('Euros (€)')
    ax1.legend(fontsize=12)
    ax1.grid()
    
    ax2 = fig.add_subplot(gs[0, 2])
    data, labels = pie_partners(df) 
    ax2.pie(data, labels=labels, autopct='%1.1f%%', textprops={'fontsize': 10})
    ax2.set_title("Principales partners 01/11/2023 - 04/11/2024", fontsize=16)

    
    ax3 = fig.add_subplot(gs[1, :2])
    table_prodcuts = products(df)
    for product in table_prodcuts.columns:
        ax3.plot(table_prodcuts.index, table_prodcuts[product], label=product)
    ax3.set_title("Principales productos importados", fontsize=16)
    ax3.set_ylabel('kg')
    ax3.legend(fontsize=12)
    ax3.grid()
  
    ax4 = fig.add_subplot(gs[1, 2])
    data_p, labels_p = pie_products(df)
    ax4.pie(data_p, labels=labels_p, autopct='%1.1f%%', textprops={'fontsize': 12})
    ax4.set_title("Top 5 productos importados 01/11/2023 - 04/11/2024", fontsize=16)

    plt.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.85, wspace=0.4, hspace=0.2)
    plt.suptitle(f'Dashboard of {name}', fontsize=20, y=0.9, ha='center')
    plt.savefig(f'Dashboards_citricos/Dashboard_{name}.png', dpi=300, bbox_inches='tight')
    #plt.show()
    plt.close(fig)


In [11]:
def main():
    
    folder_path = Path("Countries_citricos")

    for file_path in folder_path.glob("*.csv"):
        df = pd.read_csv(file_path)
        df['Start date'] = pd.to_datetime(df['Start date'])
        dashboard(df,file_path.stem)

In [12]:
'''
df_poland = pd.read_csv('./Countries/Poland.csv')
df_poland['Start date'] = pd.to_datetime(df_poland['Start date'])
dashboard(df_poland, 'Poland')
'''

"\ndf_poland = pd.read_csv('./Countries/Poland.csv')\ndf_poland['Start date'] = pd.to_datetime(df_poland['Start date'])\ndashboard(df_poland, 'Poland')\n"

In [13]:
main()