# T3 Truck Transaction Data

This notebook explores the cleaned T3 data, and produces some summary statistics and visualisations.

### Imports

In [1]:
import pandas as pd
import altair as alt

### Load Data Function

In [2]:
def load_data(file_path: str) -> pd.DataFrame:
    """Load data from a CSV file into a DataFrame."""

    return pd.read_csv(file_path)

### Load Data

In [21]:
payment_method_df = load_data("clean_data/DIM_Payment_Method.csv")
truck_df = load_data("clean_data/DIM_Truck.csv")
transaction_df = load_data("clean_data/FACT_Transaction.csv")

### Which trucks are making the most transactions?

In [None]:
truck_transactions = transaction_df.merge(
    truck_df, on='truck_id')

truck_transactions = truck_transactions.groupby(
    'truck_name').size().reset_index(name='transaction_count')

alt.Chart(truck_transactions).mark_bar().encode(
    x=alt.X('transaction_count:Q', title='Number of Transactions'),
    y=alt.Y('truck_name:N', title='Truck Name').sort('-x'),
    color='truck_name:N'
).properties(
    title='Transactions by Truck'
)

### What proportion of people pay in cash vs card?

In [5]:
payment_methods = transaction_df.merge(
    payment_method_df, on='payment_method_id')

payment_methods = payment_methods.groupby(
    'payment_method').size().reset_index(name='transaction_count')

alt.Chart(payment_methods).mark_arc().encode(
    theta='transaction_count:Q',
    color='payment_method:N'
)

### Does FSA rating have a correlation between a truck's sales?

In [27]:
transaction_fsa = truck_transactions.merge(
    truck_df[['truck_name', 'fsa_rating']],
    on='truck_name',
    how='left'
)

alt.Chart(transaction_fsa).mark_bar().encode(
    x=alt.X('transaction_count:Q', title='Number of Transactions'),
    y=alt.Y('fsa_rating:N', title='FSA Rating').sort('-y'),
    color='truck_name:N'
).properties(
    title='Transactions by FSA Rating'
)