In [67]:
#Setup
import pandas as pd
import altair as alt
from pathlib import Path
import numpy as np
import statsmodels.api as sm
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [68]:
parent_path = str(Path().resolve().parent.parent) + "/"
data_path = "data/"
processed_path = "processed/"
processed_data_file = "processed_data.csv"

In [69]:
df = pd.read_csv(parent_path + data_path + processed_path + processed_data_file)

### Käufe vs Rückgaben 

In [70]:
purchases = df[~df['InvoiceNo'].str.contains('C')]
returns = df[df['InvoiceNo'].str.contains('C')]
# Delet the numbners of returns out of the purchases
purchases = purchases[~purchases['InvoiceNo'].isin(returns['InvoiceNo'])]
purchases_count = purchases.shape[0]
returns_count = returns.shape[0]
# Create a dataframe with the counts
data = pd.DataFrame({'Category': ['Purchases', 'Returns'], 'Count': [purchases_count, returns_count]})



In [72]:
chart = alt.Chart(data).encode(
    theta=alt.Theta("Count:Q"), 
    color=alt.condition(
        alt.FieldOneOfPredicate("Category", ["Returns"]),
        alt.value("darkred"),     
        alt.value("grey")     
    )
).properties(
    title={"text":["Distribution of returns versus keept purchases"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"]},
    width=550,
    height=350
)

pie = chart.mark_arc(outerRadius=130)
#text = chart.mark_text(radius=130, size=12).encode(text="Country:N")

text_returns = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="darkred"
).encode(
    x=alt.value(230),  # pixels from left
    y=alt.value(30),  # pixels from top
    text=alt.value("Returns: 1,6%")
)

text_purchases = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    color="grey"
).encode(
    x=alt.value(10),  # pixels from left
    y=alt.value(100),  # pixels from top
    text=alt.value("Keept purchases: 98,4%")
)




alt.layer(pie, text_returns, text_purchases).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16
)

### Kunden mit den meisten Einkäufen und Rückgaben

In [73]:

df2 = df[df['InvoiceNo'].str.contains('C') == False].groupby('CustomerID')['InvoiceNo'].count().reset_index(name='Purchases')
df2['Returns'] = df[df['InvoiceNo'].str.contains('C') == True].groupby('CustomerID')['InvoiceNo'].count().reset_index(name='Purchases')['Purchases']
df2
df2['CustomerID'] = df2['CustomerID'].astype(int).astype(str)
df2['Purchases'] = df2['Purchases'].astype(int)
#if returns is nan, set it to 0
df2['Returns'] = df2['Returns'].fillna(0)
df2['Returns'] = df2['Returns'].astype(int)


In [84]:

#Top 10 canceled orders
chart2=alt.Chart(df2.sort_values('Returns', ascending=False).head(10)).mark_bar().encode(
    y= alt.Y('Returns',
               axis=alt.Axis(title="Returns", 
                          labelAngle=0,
                          titleAnchor="end",
                          grid=False,

                        )),
    x=alt.X('CustomerID:N',sort= '-y',
            scale=alt.Scale(0,200),
            axis=alt.Axis(title="CustomerID", 
                          labelAngle=0,
                          titleAnchor="start",
                          grid=False,
                        )),
    color=alt.condition(
        alt.FieldOneOfPredicate("CustomerID", ["13280","14297"]),
        alt.value("darkred"),
        alt.value("grey")
    )
    
).properties(
    title={"text":["Top 10 Customer that return the most"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"] },
    width=500,
    height=350
)
alt.layer(chart2).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=12,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
)

###  NUR Kunden die Einkäufe und Rückaben getätigt haben

In [75]:
#drop where retuns is 0
df3 = df2[df2['Returns'] != 0]

In [76]:
chart3 = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
color=alt.condition(
        alt.FieldOneOfPredicate("Purchases", [""]),
        alt.value("darkred"),
        alt.value("grey")
    )
    
)



alt.layer(chart3).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=12,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
)


In [81]:
chart4 = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
    color=alt.condition(
        alt.FieldOneOfPredicate("Purchases", [""]),
        alt.value("darkred"),
        alt.value("grey")
    )
).properties(
    title={"text":["Number of purchases compared to returns"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"] },
    width=500,
    height=350
)

alt.layer(chart4).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=12,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).transform_filter(
    (alt.datum.Purchases < 2000) & (alt.datum.Returns < 80)
)



In [83]:
chart5 = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
    color=alt.condition(
        alt.FieldOneOfPredicate("Purchases", [""]),
        alt.value("darkred"),
        alt.value("grey")
    )
).properties(
    title={"text":["Number of purchases compared to returns"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"] },
    width=500,
    height=350
)
# Filter out data points with Purchases values greater than 1000 or canceled values greater than 20
chart = chart.transform_filter(
    (alt.datum.Purchases < 800) & (alt.datum.Returns < 60)
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).properties(
    title='Purchases vs Returns',
    width=600,
    height=400
)
alt.layer(chart5).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=12,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).transform_filter(
    (alt.datum.Purchases < 800) & (alt.datum.Returns < 60)
)

In [18]:
#make a new colum for returnd when in InvoiceNo is a c
df['Returns'] = np.where(df['InvoiceNo'].str.contains('C'), 1, 0)
#Count how many times a StockCode was returned
df_returned = df.groupby(['StockCode'])['Returns'].sum().reset_index()

In [87]:
chart3=alt.Chart(df_returned.sort_values('Returns', ascending=False).head(10)).mark_bar().encode(
    y= alt.Y('Returns',
               axis=alt.Axis(title="Returns", 
                          labelAngle=0,
                          titleAnchor="start",
                          grid=False,

                        )),
    x=alt.X('StockCode',sort= '-y',
            scale=alt.Scale(0,200),
            axis=alt.Axis(title="CustomerID", 
                          labelAngle=0,
                          titleAnchor="middle",
                          grid=False,
                        )),
    color=alt.condition(
        alt.FieldOneOfPredicate("StockCode", ["22423","22960","22720"]),
        alt.value("darkred"),
        alt.value("grey")
    )
    
).properties(
    title={"text":["Top 10 items that got returned the most"], "subtitle":["In the period from 01.12.2010 to 30.11.2011"] },
    width=500,
    height=350
)
alt.layer(chart3).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=12,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
)

Alt

In [None]:
chart = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
    tooltip='CustomerID'
).interactive()

# Filter out data points with Purchases values greater than 1000 or canceled values greater than 20
chart = chart.transform_filter(
    (alt.datum.Purchases < 800) & (alt.datum.Returns < 60)
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).properties(
    title='Purchases vs Returns',
    width=600,
    height=400
)
chart 
chart = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
    tooltip='CustomerID'
).interactive()

# Filter out data points with Purchases values greater than 1000 or canceled values greater than 20
chart = chart.transform_filter(
    (alt.datum.Purchases < 400) & (alt.datum.Returns < 30)
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).properties(
    title='Purchases vs Returns',
    width=600,
    height=400
)
chart 
chart = alt.Chart(df3).mark_circle().encode(
    x=alt.X('Purchases',
             axis=alt.Axis(title='Purchases',
                           titleAnchor='start',
                           labelAngle=0,)
        ),
    y=alt.Y('Returns',
            axis=alt.Axis(title='Returns',
                          titleAnchor='end',
                          labelAngle=0,)
                          ),
    tooltip='CustomerID'
).interactive()

# Filter out data points with Purchases values greater than 1000 or canceled values greater than 20
chart = chart.transform_filter(
    (alt.datum.Purchases < 300) & (alt.datum.Returns < 20)
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=22,
    font="Arial",
    color="black",
    anchor="start"
).configure_axis(
    labelFont="Arial",
    titleFont="Arial",
    labelFontSize=14,
    titleFontSize=16,
    titleFontWeight="normal",
    labelColor="grey",
    titleColor="grey"
).properties(
    title='Purchases vs Returns',
    width=600,
    height=400
)
chart 