In [384]:
import pandas as pd
import altair as alt

FOLDER_PATH = "./historical"

trucks = pd.read_csv(f"{FOLDER_PATH}/combined_truck_data.csv")

# Which truck has the highest number of transactions?

In [385]:
trucks["truck_id"].value_counts()[3]



1572

# Which truck has the lowest total transaction value?

In [386]:
trucks.groupby(['truck_id'])['total_price'].sum().sort_values()

truck_id
4     854.78
6    3378.24
5    5201.50
1    5407.50
2    7458.10
3    9314.28
Name: total_price, dtype: float64

# What is the average transaction value?

In [387]:
trucks.total_price.mean()

6.271454076572108

# What is the average transaction value for each truck?

In [388]:
average_trucks = trucks.groupby(['truck_id'])['total_price'].mean()

average_trucks

truck_id
1    7.692034
2    8.080282
3    5.925115
4    2.654596
5    5.504233
6    5.865000
Name: total_price, dtype: float64

# What proportion of transactions use cash?

In [389]:
trucks["payment_type"].value_counts()

payment_type
card    2801
cash    2240
Name: count, dtype: int64

# A bar chart showing average transaction total for each truck

In [390]:
alt.data_transformers.disable_max_rows()

alt.Chart(trucks).mark_bar().encode(
    x="truck_id",
    y='average(total_price)'
)

# A pie chart showing the proportion of transactions made using cash or card

In [391]:
alt.Chart(trucks).mark_arc().encode(
    theta="count(payment_type)",
    color="payment_type"
)

In [392]:
alt.data_transformers.disable_max_rows()

base = alt.Chart(trucks).encode(
    x="truck_id",
    y='count(total_price)',
    text='count(total_price)'
)

base.mark_bar() + base.mark_text(align='left', dx=3)