## Import libraries

In [1]:
import pandas as pd
import plotly.express as px

## Read data

In [2]:
data1 = pd.read_csv('index_1.csv')

In [3]:
data1

Unnamed: 0,date,datetime,cash_type,card,money,coffee_name
0,2024-03-01,2024-03-01 10:15:50.520,card,ANON-0000-0000-0001,38.70,Latte
1,2024-03-01,2024-03-01 12:19:22.539,card,ANON-0000-0000-0002,38.70,Hot Chocolate
2,2024-03-01,2024-03-01 12:20:18.089,card,ANON-0000-0000-0002,38.70,Hot Chocolate
3,2024-03-01,2024-03-01 13:46:33.006,card,ANON-0000-0000-0003,28.90,Americano
4,2024-03-01,2024-03-01 13:48:14.626,card,ANON-0000-0000-0004,38.70,Latte
...,...,...,...,...,...,...
3631,2025-03-23,2025-03-23 10:34:54.894,card,ANON-0000-0000-1158,35.76,Cappuccino
3632,2025-03-23,2025-03-23 14:43:37.362,card,ANON-0000-0000-1315,35.76,Cocoa
3633,2025-03-23,2025-03-23 14:44:16.864,card,ANON-0000-0000-1315,35.76,Cocoa
3634,2025-03-23,2025-03-23 15:47:28.723,card,ANON-0000-0000-1316,25.96,Americano


In [4]:
data2 = pd.read_csv('index_2.csv')

In [5]:
data2

Unnamed: 0,date,datetime,cash_type,money,coffee_name
0,2025-02-08,2025-02-08 14:26:04,cash,15.0,Tea
1,2025-02-08,2025-02-08 14:28:26,cash,15.0,Tea
2,2025-02-08,2025-02-08 14:33:04,card,20.0,Espresso
3,2025-02-08,2025-02-08 15:51:04,card,30.0,Chocolate with coffee
4,2025-02-08,2025-02-08 16:35:01,cash,27.0,Chocolate with milk
...,...,...,...,...,...
257,2025-03-23,2025-03-23 14:55:46,cash,30.0,Cappuccino
258,2025-03-23,2025-03-23 15:15:36,card,25.0,Irish whiskey
259,2025-03-23,2025-03-23 17:59:25,card,28.0,Super chocolate
260,2025-03-23,2025-03-23 18:01:33,card,28.0,Vanilla with Irish whiskey


## Data Preprocessing

In [6]:
merged_data = pd.concat([data1, data2], ignore_index=True)
merged_data

Unnamed: 0,date,datetime,cash_type,card,money,coffee_name
0,2024-03-01,2024-03-01 10:15:50.520,card,ANON-0000-0000-0001,38.7,Latte
1,2024-03-01,2024-03-01 12:19:22.539,card,ANON-0000-0000-0002,38.7,Hot Chocolate
2,2024-03-01,2024-03-01 12:20:18.089,card,ANON-0000-0000-0002,38.7,Hot Chocolate
3,2024-03-01,2024-03-01 13:46:33.006,card,ANON-0000-0000-0003,28.9,Americano
4,2024-03-01,2024-03-01 13:48:14.626,card,ANON-0000-0000-0004,38.7,Latte
...,...,...,...,...,...,...
3893,2025-03-23,2025-03-23 14:55:46,cash,,30.0,Cappuccino
3894,2025-03-23,2025-03-23 15:15:36,card,,25.0,Irish whiskey
3895,2025-03-23,2025-03-23 17:59:25,card,,28.0,Super chocolate
3896,2025-03-23,2025-03-23 18:01:33,card,,28.0,Vanilla with Irish whiskey


In [7]:
merged_data.isnull().sum()

Unnamed: 0,0
date,0
datetime,0
cash_type,0
card,351
money,0
coffee_name,0


In [8]:
merged_data.dtypes

Unnamed: 0,0
date,object
datetime,object
cash_type,object
card,object
money,float64
coffee_name,object


In [9]:
merged_data['datetime'] = pd.to_datetime(merged_data['datetime'], format='mixed')

## Data analysis and visualization

In [10]:
coffee_counts = merged_data['coffee_name'].value_counts().reset_index()
coffee_counts.columns = ['coffee_name', 'count']

In [11]:
fig_coffee = px.bar(coffee_counts.head(20), x = 'coffee_name', y = 'count', title = 'Most Popular Coffee Types')

fig_coffee.update_layout(
    xaxis=dict(
        title=dict(
            text="Coffee Types"
        )
    ),
    yaxis=dict(
        title=dict(
            text="Number of Purchases"
        )
    ),
    font=dict(
        family="Roboto",
        size=18,
        color="RebeccaPurple"
    )
)

fig_coffee.show()

In [12]:
sales = merged_data.groupby('coffee_name')['money'].sum().reset_index().sort_values(by='money', ascending=False)
sales.columns = ['coffee_name', 'money']
sales

Unnamed: 0,coffee_name,money
28,Latte,28658.3
1,Americano with Milk,25269.12
3,Cappuccino,18514.14
0,Americano,15437.26
23,Hot Chocolate,10172.46
12,Cocoa,8678.16
15,Cortado,7534.86
22,Espresso,3187.28
2,Americano with milk,1100.0
25,Irish whiskey,525.0


In [13]:
fig_sales = px.bar(sales, x = 'coffee_name', y = 'money', title = 'Total sales in different coffee')

fig_sales.update_layout(
    xaxis=dict(
        title=dict(
            text="Coffee Types"
        )
    ),
    yaxis=dict(
        title=dict(
            text="Total sales(money)"
        )
    ),
    font=dict(
        family="Roboto",
        size=18,
        color="RebeccaPurple"
    )
)

fig_sales.show()


In [14]:
total_money_by_date = merged_data.groupby('date')['money'].sum().reset_index()

fig = px.line(total_money_by_date, x='date', y='money',
              title='Daily coffee sales',
              labels={'date': 'Date', 'money': 'Total Money'})

fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total sales (money)',
    font=dict(
        family="Roboto",
        size=18,
        color="RebeccaPurple"
    )
)

fig.show()

### Conclusion:

*   The most popular coffee type is Americano with Milk.
*   The highest sales is Latte



