In [36]:
#Setup
import pandas as pd
import altair as alt
from pathlib import Path

In [37]:
parent_path = str(Path().resolve().parent.parent) + "/"
data_path = "data/"
processed_path = "processed/"
processed_data_file = "processed_data.csv"

In [38]:
df = pd.read_csv(parent_path + data_path + processed_path + processed_data_file)

In [39]:
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalPrice
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom,15.30
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom,22.00
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
...,...,...,...,...,...,...,...,...,...
539108,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France,10.20
539109,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France,12.60
539110,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France,16.60
539111,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France,16.60


In [40]:
df = df[df["InvoiceDate"]< "12/1/2011"]

In [41]:
df["TotalPrice"].sum()/10

313526.745

In [42]:
# Filter nach den umsatzstärksten Ländern
df_plot = df.groupby("Country").sum().sort_values(["TotalPrice"], ascending=False)[:10]
df_plot["Country"] = df_plot.index


# Visualisierung der umsatzstärksten Kunden
chart_co1 = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X("TotalPrice",
            axis=alt.Axis(title="Umsatz in £", 
                          labelAngle=0,
                          titleAnchor="start",
                          grid=False)),
    y=alt.Y("Country",
            sort="-x", 
            axis=alt.Axis(title = "Land",
                          titleAnchor="middle")),
    color=alt.condition(
        alt.FieldOneOfPredicate("Country", ["United Kingdom"]),
        alt.value("orange"),     
        alt.value("steelblue")     
    )
).properties(
    title={"text":["Die 10 umsatzstärksten Länder"], "subtitle":["Im Zeitraum zwischen dem 01.12.2010 - 30.11.2011"]},
    width=550,
    height=350
)

rule = alt.Chart(
    pd.DataFrame({'x':[313526.745]})
).mark_rule(color='red', strokeDash=[4, 2]).encode(
    x='x'
)

alt.layer(chart_co1 + rule).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16
)

In [43]:
# Filter nach den umsatzstärksten Ländern
df_plot = df.groupby("Country").sum().sort_values(["TotalPrice"], ascending=False)[:10]
df_plot["Country"] = df_plot.index



In [44]:
df_plot["Country"]["United Kingdom"] ="UK"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_plot["Country"]["United Kingdom"] ="UK"


In [45]:
df_plot["Country"]["EIRE"] ="Ireland"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_plot["Country"]["EIRE"] ="Ireland"


In [46]:
df_plot["TotalPrice"] = df_plot["TotalPrice"].div(1000)

In [47]:

# Visualisierung der umsatzstärksten Kunden
chart_co1 = alt.Chart(df_plot).mark_bar().encode(
    y=alt.Y("TotalPrice",
            axis=alt.Axis(title="Revenue in (thousand) pound", 
                          labelAngle=0,
                          titleAnchor="end",
                          grid=False,
                          values=[500,1000,1500,2000,2500])),
    x=alt.X("Country",
            sort=["UK", "Netherlands","Ireland","France","Germany","Australia","Spain","Switzerland","Belgium","Sweden"],
            axis=alt.Axis(title = "Country",
                          titleAnchor="start",
                          labelAngle=0)),
    color=alt.condition(
        alt.FieldOneOfPredicate("Country", ["UK"]),
        alt.value("darkred"),     
        alt.value("grey")     
    )
).properties(
    title={"text":["Top ten countries with the highest sales"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"]},
    width=650,
    height=350
)

rule = alt.Chart(
    pd.DataFrame({'y':[313.52]})
).mark_rule(color='grey', strokeDash=[4, 2]).encode(
    y='y'
)

goal = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="grey"
).encode(
    x=alt.value(666),  # pixels from left
    y=alt.value(318),  # pixels from top
    text=alt.value("Long-term Goal")
)
rule_short = alt.Chart(
    pd.DataFrame({'y':[783,8]})
).mark_rule(color='grey', strokeDash=[4, 2]).encode(
    y='y'
)

goal_short = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="grey"
).encode(
    x=alt.value(666),  # pixels from left
    y=alt.value(259),  # pixels from top
    text=alt.value("Short-term Goal")
)

alt.layer(chart_co1 + rule + goal + rule_short + goal_short).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
)

In [48]:
# Filter nach den umsatzstärksten Ländern
df_plot = df.groupby("Country").sum().sort_values(["TotalPrice"], ascending=False)

In [49]:
revenue_uk = 0
revenue_else = 0

for i in df_plot.index:
    if i == "United Kingdom":
        revenue_uk += df_plot.loc[i]["TotalPrice"]
    else:
        revenue_else += df_plot.loc[i]["TotalPrice"]

In [50]:
df_plot = pd.DataFrame()
df_plot["Country"] = ["United Kingdom", "Rest of the world"]
df_plot["TotalPrice"] = [revenue_uk, revenue_else]

In [51]:
df_plot

Unnamed: 0,Country,TotalPrice
0,United Kingdom,2653014.63
1,Rest of the world,482252.82


In [64]:
total = 2653014.63 + 482252.82


In [66]:
2653014.63 / total

0.8461844714395896

In [65]:
482252.82 / total

0.1538155285604104

In [77]:
# funktioniert
chart = alt.Chart(df_plot).encode(
    theta=alt.Theta("TotalPrice:Q"), 
    color=alt.condition(
        alt.FieldOneOfPredicate("Country", ["United Kingdom"]),
        alt.value("darkred"),     
        alt.value("grey")     
    )
).properties(
    title={"text":["Distribution of revenue between the UK and the rest of the world"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"]},
    width=550,
    height=350
)

pie = chart.mark_arc(outerRadius=130)
#text = chart.mark_text(radius=130, size=12).encode(text="Country:N")

text_uk = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="darkred"
).encode(
    x=alt.value(35),  # pixels from left
    y=alt.value(50),  # pixels from top
    text=alt.value("United Kingdom (UK)")
)
revenue_uk = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=700,
    color="darkred"
).encode(
    x=alt.value(35),  # pixels from left
    y=alt.value(70),  # pixels from top
    text=alt.value("Revenue share: 85%")
)

text_rest = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="grey"
).encode(
    x=alt.value(385),  # pixels from left
    y=alt.value(50),  # pixels from top
    text=alt.value("Rest of the world")
)

revenue_rest = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=700,
    color="grey"
).encode(
    x=alt.value(385),  # pixels from left
    y=alt.value(70),  # pixels from top
    text=alt.value("Revenue share: 15%")
)

alt.layer(pie, text_uk, revenue_uk, text_rest, revenue_rest).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16
)