In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

%matplotlib inline

In [73]:
df = pd.read_csv("../data/FAO coffee bean production/coffee-bean-production.csv")
# df.drop(columns="Unnamed: 0",inplace=True)

In [74]:
df.columns = ["Entity", "Code", "Year", "Production"]

In [75]:
china_production = df.loc[df["Entity"] == "China", "Production"]
taiwan_production = df.loc[df["Entity"] == "Taiwan", ["Year", "Production"]]
china_year_mask = df["Year"].isin(taiwan_production["Year"])
df.loc[
    china_year_mask & (df["Entity"] == "China"), "Production"
] += taiwan_production.loc[
    taiwan_production["Year"].isin(df.loc[china_year_mask, "Year"]), "Production"
].values

In [76]:
df.loc[df["Entity"] == "China", "Production"]

1007      1612.00
1008      1756.00
1009      2042.00
1010      2346.00
1011      2577.00
          ...    
1063    115110.14
1064    116519.00
1065    121012.00
1066    114970.00
1067    108906.00
Name: Production, Length: 61, dtype: float64

In [77]:
df = df.drop(index=df[df["Entity"].isin(["Taiwan"])].index)
delete_name = [name for name in df.Entity.unique() if "(FAO)" in name]
continent = ["Africa", "Asia", "Oceania", "Americas", "Europe"]
df = df.drop(index=df[df["Entity"].isin(delete_name)].index)
df = df.drop(index=df[df["Entity"].isin(continent)].index)
mask = df.isna().any(axis=1)
no_code = list(df[mask].Entity.unique())
df = df.drop(index=df[df["Entity"].isin(no_code)].index)

In [78]:
df

Unnamed: 0,Entity,Code,Year,Production
183,Angola,AGO,1961,168600.00
184,Angola,AGO,1962,185000.00
185,Angola,AGO,1963,168300.00
186,Angola,AGO,1964,198200.00
187,Angola,AGO,1965,205000.00
...,...,...,...,...
7091,Zimbabwe,ZWE,2017,683.33
7092,Zimbabwe,ZWE,2018,627.78
7093,Zimbabwe,ZWE,2019,670.37
7094,Zimbabwe,ZWE,2020,660.49


In [79]:
countries_and_regions = [
    "Mexico",
    "Colombia",
    "Guatemala",
    "Brazil",
    "China",
    "Hawaii",
    "Honduras",
    "Costa Rica",
    "Ethiopia",
    "Tanzania",
    "Uganda",
    "Thailand",
    "Nicaragua",
    "Kenya",
    "El Salvador",
    "Indonesia",
    "India",
    "Malawi",
]

filter_df = df[df['Entity'].isin(countries_and_regions)]

In [191]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import plotly.subplots as sp

# Create a subplot grid with 2 rows and 1 column
fig = sp.make_subplots(rows=1, cols=2, specs=[[{'type': 'choropleth'}, {'type': 'scatter'}]],
                       subplot_titles=('World map', 'Coffee Production by Country'))

years = df['Year'].unique()  # 61
countries = filter_df.Entity.unique()
# color for map plot 
colors_bar = ["#527A18", "#678D38", "#E9BA4E", "#BF8C31", "#6E3D19"]
custom_scale = [[i/(len(colors_bar)-1), colors_bar[i]]
                for i in range(len(colors_bar))]
# colors for line plot
colors_line = [
    "#FF5733",
    "#FFC300",
    "#527A18",
    "#678D38",
    "#00FF7F",
    "#00FFFF",
    "#0070FF",
    "#651FFF",
    "#B200FF",
    "#FF00FF",
    "#FF007F",
    "#FF69B4",
    "#DC143C",
    "#FF8C00",
    "#E9BA4E",
    "#00CED1",
    "#9400D3",
]
# ADD A CHOROPLETH TRACES FOR THE DATA-FRAME
# use a  loop to iterateand for every year, add a trace.
for year in years:
    fig.add_trace(  # Add A trace to the figure
        go.Choropleth(  # Specify the type of the trace
            # uid=unique id (Assign an ID to the trace)
            uid=f"World_{year}",
            # Supply location information tag for mapping
            locations=df[(df["Year"] == year)]["Code"],
            # Data to be color-coded on graph
            z=np.log(df[(df["Year"] == year)]["Production"]),
            colorbar=dict(
                title="Production",
                x=0.45,  # Adjust the x position of the color bar
                xanchor='left',  # Anchor the color bar to the left
                y=0.55,  # Adjust the y position of the color bar
                yanchor='middle',  # Anchor the color bar in the middle,
                thickness = 10
            ),
            visible=False,  # Specify whether or not to make data-visible when rendered
            zmin=0,  # Minimum value for the color scale
            zmax=np.log(5000000),  # Maximum value for the color scale
            colorscale=custom_scale,
            # Modify hover text to show original values
            hovertemplate="%{location}<br>Production: %{customdata:,}<extra></extra> tons",
            customdata=df[(df["Year"] == year)]["Production"]
        ), row=1, col=1
    )

## use a  loop to iterateand for every year, add a trace for points.
for i, year in enumerate(years):
    fig.add_trace(
        go.Scatter(
            uid=f"line_{year}",
            x=[pd.to_datetime(year, format="%Y")]*17,
            y=filter_df[filter_df["Year"] == year]["Production"],
            mode='markers',
            visible=False,
            showlegend=False,
            marker=dict(color=colors_line, size=5),
            hovertemplate=f"Production ({year}):<br>" +
            " %{y:,} <extra></extra> tons",
        ), row=1, col=2
    )

for i, country in enumerate(countries):
    fig.add_trace(
        go.Scatter(
            uid=f"line_{country}",
            x=pd.to_datetime(years, format="%Y"),
            y=filter_df.loc[filter_df["Entity"] == country]["Production"],
            mode='lines',
            visible=True,
            showlegend=True,
            name=country,
            line=dict(color=colors_line[i % len(colors_line)], width=0.6),
            hovertemplate=f"{country}'s Production:<br>" +
            " %{y:,} <extra></extra> tons",
        ))

fig.update_yaxes(row=1, col=2, tickformat=".3s")
fig.update_xaxes(range=[pd.to_datetime(min(df.Year), format="%Y"), pd.to_datetime(
    max(df.Year), format="%Y")], row=1, col=2)
# Add x and y axis labels to the line plot subplot
fig.update_xaxes(title_text='Year', row=1, col=2)  # Set the x-axis label for the line plot subplot
fig.update_yaxes(title_text='Production(tons)',title_standoff= 1,row=1, col=2)  


fig.data[14].visible = True
fig.data[75].visible = True
i = 0
steps = []
for year in years:
    step = dict(
        method="update",
        args=[{"visible": [False]*122+[True]*17}, dict(mode='immediate', frame=dict(
            duration=0, redraw=True), fromcurrent=True)],  # layout attribute
        label=f'{year}',
    )
    step["args"][0]["visible"][i] = True
    step["args"][0]["visible"][i+len(years)] = True
    i += 1
    steps.append(step)

# Create slider
slider = dict(steps=steps,
              active=14,
              currentvalue={'prefix': 'Year: '},
              pad={"t": 50})

# Add slider to the layout
fig.update_layout(sliders=[slider],
                  geo=dict(bgcolor='#F3EFE6'),
                  paper_bgcolor='#F3EFE6',
                  plot_bgcolor='#F3EFE6',
                  margin=dict(l=20, r=15),
                  hovermode='x unified',
                  height=600,
                  width=1200,
                  title_x=0.5,
                  title_y=0.98,
                  title_xanchor="center",
                  title_yanchor="top")

# Show the figure
fig.show()
pio.write_html(fig, file='../website/pages/map_line.html', auto_open=False)

In [11]:
len(df.Entity.unique())

87