# A manager's analysis of an unspecified coffee chain in the United States

Questions that will be queried in this analysis:
* Where are our most valuable markets?
* Which products should we focus on?
* Are our markets meeting their goals?

# Prerequisits: Importing the necessary libraries, database, and checking for nulls

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go
pio.renderers.default = "kaggle"
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/coffee-chain-sales-dataset/Coffee_Chain_Sales .csv


In [2]:
# Import the Database
df=pd.read_csv('/kaggle/input/coffee-chain-sales-dataset/Coffee_Chain_Sales .csv')
df.head(10)

Unnamed: 0,Area Code,Cogs,DifferenceBetweenActualandTargetProfit,Date,Inventory Margin,Margin,Market_size,Market,Marketing,Product_line,...,Product,Profit,Sales,State,Target_cogs,Target_margin,Target_profit,Target_sales,Total_expenses,Type
0,303,51,-35,10/1/2012,503,71,Major Market,Central,46,Leaves,...,Lemon,-5,122,Colorado,30,60,30,90,76,Decaf
1,970,52,-24,10/1/2012,405,71,Major Market,Central,17,Leaves,...,Mint,26,123,Colorado,30,60,50,90,45,Decaf
2,409,43,-22,10/2/2012,419,64,Major Market,South,13,Leaves,...,Lemon,28,107,Texas,30,60,50,90,36,Decaf
3,850,38,-15,10/3/2012,871,56,Major Market,East,10,Leaves,...,Darjeeling,35,94,Florida,40,60,50,100,21,Regular
4,562,72,6,10/4/2012,650,110,Major Market,West,23,Leaves,...,Green Tea,56,182,California,20,60,50,80,54,Regular
5,712,0,-29,10/5/2012,430,43,Small Market,Central,0,Beans,...,Decaf Espresso,31,43,Iowa,0,60,60,60,12,Decaf
6,860,47,-29,10/6/2012,375,64,Small Market,East,15,Beans,...,Decaf Espresso,21,111,Connecticut,30,60,50,90,43,Decaf
7,918,27,-39,10/7/2012,859,39,Small Market,South,7,Beans,...,Decaf Irish Cream,21,66,Oklahoma,30,60,60,90,18,Decaf
8,775,31,-43,10/8/2012,1000,37,Small Market,West,9,Beans,...,Decaf Irish Cream,7,68,Nevada,30,60,50,90,30,Decaf
9,435,40,-23,10/9/2012,881,59,Small Market,West,11,Beans,...,Decaf Espresso,37,99,Utah,20,60,60,80,22,Decaf


In [3]:
df.info

<bound method DataFrame.info of       Area Code  Cogs  DifferenceBetweenActualandTargetProfit       Date  \
0           303    51                                     -35  10/1/2012   
1           970    52                                     -24  10/1/2012   
2           409    43                                     -22  10/2/2012   
3           850    38                                     -15  10/3/2012   
4           562    72                                       6  10/4/2012   
...         ...   ...                                     ...        ...   
1057        775   250                                     133  8/23/2015   
1058        971    88                                      48  8/24/2015   
1059        775   294                                    -285  8/25/2015   
1060        503   134                                      80  8/26/2015   
1061        435    20                                     -22  8/27/2015   

      Inventory Margin  Margin   Market_size   Market  

In [4]:
# Check for nulls
df.isnull().sum()

Area Code                                 0
Cogs                                      0
DifferenceBetweenActualandTargetProfit    0
Date                                      0
Inventory Margin                          0
Margin                                    0
Market_size                               0
Market                                    0
Marketing                                 0
Product_line                              0
Product_type                              0
Product                                   0
Profit                                    0
Sales                                     0
State                                     0
Target_cogs                               0
Target_margin                             0
Target_profit                             0
Target_sales                              0
Total_expenses                            0
Type                                      0
dtype: int64

# Qustion 1: Where are our most valuable markets?

In [5]:
#Check which states have the biggest traffic and profit
salestate = df.groupby('State', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False)
profstate= df.groupby('State', as_index=False)['Profit'].sum().sort_values('Profit', ascending=False)
fig1=make_subplots(rows=1, cols=2, subplot_titles=["Total Sales by State", "Total Profit by State"])
fig1.add_trace(go.Bar(x=salestate['State'], y=salestate['Sales'], name='Sales'),row=1, col=1)
fig1.add_trace(go.Bar(x=profstate['State'], y=profstate['Profit'], name='Profit'),row=1, col=2)
fig1.update_layout(height=500, width=1000, title_text="Sales vs Profit by State")
fig1.show()
print("Top Sales State:", salestate.iloc[0])
print("Top Profit State:", profstate.iloc[0])

Top Sales State: State    California
Sales         23032
Name: 0, dtype: object
Top Profit State: State     Illinois
Profit        7380
Name: 4, dtype: object


# Question 2: Which products should we focus on?

In [16]:
#Check which drink makes the biggest profit
drinkprof = df.groupby('Product', as_index=False).agg({'Profit': 'sum','Product_line':'first'}).sort_values('Profit', ascending=False)
fig2=px.bar(drinkprof,x='Product',y='Profit',title='Total Profit by Product',color='Product_line',color_discrete_map={'Beans': 'saddlebrown', 'Leaves': 'forestgreen'})
fig2.update_traces(text=drinkprof['Profit'],texttemplate='%{text:.2f}', textposition='outside')
fig2.update_layout(xaxis={'categoryorder':'total descending'},yaxis_title='Total Profit ($)')
fig2.show()
#Clarify which products have negative profit
loss_makers = drinkprof[drinkprof['Profit'] < 0]

if loss_makers.empty:
    print("✅ No products have negative profit.")
else:
    print("⚠️ Products with Negative Profit:\n", loss_makers)

✅ No products have negative profit.


# Question 3: Are our markets meeting their goals?

In [25]:
# Group & process data
market_perf = df.groupby('Market', as_index=False).agg({'Profit': 'sum','Target_profit': 'sum','DifferenceBetweenActualandTargetProfit': 'sum'})
state_perf = df.groupby('State', as_index=False).agg({'Profit': 'sum','Target_profit': 'sum','DifferenceBetweenActualandTargetProfit': 'sum'})
market_perf['Met_Quota'] = market_perf['DifferenceBetweenActualandTargetProfit'] > 0
state_perf['Met_Quota'] = state_perf['DifferenceBetweenActualandTargetProfit'] > 0
market_perf = market_perf.sort_values('DifferenceBetweenActualandTargetProfit', ascending=False)
state_perf = state_perf.sort_values('DifferenceBetweenActualandTargetProfit', ascending=False)

# Create subplots
fig3 = make_subplots(rows=1, cols=2,subplot_titles=["Are Our States Meeting Their Profit Goals?","Are Our Markets Meeting Their Profit Goals?"])
fig3.add_trace(
    go.Bar(
        x=state_perf['State'],
        y=state_perf['DifferenceBetweenActualandTargetProfit'],
        name='State Profit vs Target',
        marker_color=state_perf['Met_Quota'].map({True: 'green', False: 'red'}),
        text=state_perf['DifferenceBetweenActualandTargetProfit'].round(2),
        hovertext=[
            f"Profit: ${p:,.0f}<br>Target: ${t:,.0f}<br>Diff: ${d:,.0f}"
            for p, t, d in zip(
                state_perf['Profit'],
                state_perf['Target_profit'],
                state_perf['DifferenceBetweenActualandTargetProfit']
            )
        ],
        hoverinfo="text"
    ),
    row=1, col=1
)
fig3.add_trace(
    go.Bar(
        x=market_perf['Market'],
        y=market_perf['DifferenceBetweenActualandTargetProfit'],
        name='Market Profit vs Target',
        marker_color=market_perf['Met_Quota'].map({True: 'green', False: 'red'}),
        text=market_perf['DifferenceBetweenActualandTargetProfit'].round(2),
        hovertext=[
            f"Profit: ${p:,.0f}<br>Target: ${t:,.0f}<br>Diff: ${d:,.0f}"
            for p, t, d in zip(
                market_perf['Profit'],
                market_perf['Target_profit'],
                market_perf['DifferenceBetweenActualandTargetProfit']
            )
        ],
        hoverinfo="text"
    ),
    row=1, col=2
)

# Layout and formatting
fig3.update_layout(
    height=500,
    width=1000,
    showlegend=False,
    xaxis_title='State',
    yaxis_title='Profit vs Target (Difference)',
    xaxis2_title='Market',
    yaxis2_title='Profit vs Target (Difference)',
    yaxis_tickprefix='$',
    yaxis2_tickprefix='$',
    xaxis={'categoryorder': 'total descending'},
    xaxis2={'categoryorder': 'total descending'}
)

# Dashed line at y=0 for both subplots
fig3.add_shape(
    type='line',
    x0=-0.5,
    x1=len(state_perf) - 0.5,
    y0=0,
    y1=0,
    line=dict(color='black', width=1, dash='dash'),
    xref='x',
    yref='y'
)

fig3.add_shape(
    type='line',
    x0=-0.5,
    x1=len(market_perf) - 0.5,
    y0=0,
    y1=0,
    line=dict(color='black', width=1, dash='dash'),
    xref='x2',
    yref='y2'
)

fig3.show()

# Conclusions
* California is our busiest market, while Illinois is our most profitable one. Consider investing in more locations in Illinois and changing the prices in California by a few cents to increase revenue.
* Colombian coffee appears to be our most profitable product, whilst Green Tea appears to be the least. Regardless, no negative profit from any of our products specifically. Consider pushing special offers to capitalize on Colombian's popularity and attempt to intice more sales of Green Tea.
* It appears that our Southern and Central markets are failing to meet their quotas by wide margins. Consider inquiring with local managers to recieve explanation.
* Intrestingly, whilst Illinois is our most profitable state, it's still failing to meet its quota. Consider re-evaluating quotas in proportion to anual profits.