In [1]:
#Setup
import pandas as pd
import altair as alt
from pathlib import Path

In [2]:
parent_path = str(Path().resolve().parent.parent) + "/"
data_path = "data/"
processed_path = "processed/"
processed_data_file = "processed_data.csv"

In [3]:
df = pd.read_csv(parent_path + data_path + processed_path + processed_data_file)

In [4]:
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalPrice
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom,15.30
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom,22.00
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,20.34
...,...,...,...,...,...,...,...,...,...
539108,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France,10.20
539109,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France,12.60
539110,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France,16.60
539111,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France,16.60


In [5]:
df = df[df["InvoiceDate"]< "12/1/2011"]

In [6]:
# Filter nach den umsatzstärksten Ländern
df_plot = df.groupby("Country").sum().sort_values(["TotalPrice"], ascending=False)[:10]
df_plot["Country"] = df_plot.index


# Visualisierung der umsatzstärksten Kunden
chart_co1 = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X("TotalPrice",
            axis=alt.Axis(title="Umsatz in £", 
                          labelAngle=0,
                          titleAnchor="start",
                          grid=False)),
    y=alt.Y("Country",
            sort="-x", 
            axis=alt.Axis(title = "Land",
                          titleAnchor="middle")),
    color=alt.condition(
        alt.FieldOneOfPredicate("Country", ["United Kingdom"]),
        alt.value("orange"),     
        alt.value("steelblue")     
    )
).properties(
    title={"text":["Die 10 umsatzstärksten Länder"], "subtitle":["Im Zeitraum zwischen dem 01.12.2010 - 30.11.2011"]},
    width=550,
    height=350
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=18
)
chart_co1

In [7]:
# Filter nach den umsatzstärksten Ländern
df_plot = df.groupby("Country").sum().sort_values(["TotalPrice"], ascending=False)

In [8]:
revenue_uk = 0
revenue_else = 0

for i in df_plot.index:
    if i == "United Kingdom":
        revenue_uk += df_plot.loc[i]["TotalPrice"]
    else:
        revenue_else += df_plot.loc[i]["TotalPrice"]

In [9]:
df_plot = pd.DataFrame()
df_plot["Country"] = ["United Kingdom", "Rest of the world"]
df_plot["TotalPrice"] = [revenue_uk, revenue_else]

In [10]:
df_plot

Unnamed: 0,Country,TotalPrice
0,United Kingdom,2653014.63
1,Rest of the world,482252.82


In [11]:
# funktioniert
chart = alt.Chart(df_plot).encode(
    theta=alt.Theta("TotalPrice:Q"), 
    color=alt.Color("Country:N")
)

pie = chart.mark_arc(outerRadius=100)
text = chart.mark_text(radius=130, size=12).encode(text="Country:N")

pie + text