In [None]:
#Coffee Shop Sales Analysis

In [None]:
#Step: Importing the dependencies 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [None]:
#Step: Reading the data
coffee = pd.read_csv("/kaggle/input/coffee-shop-sales-analysis/Project.csv")

In [None]:
#Checking the dimensions of data
shape = coffee.shape
print("Shape of the data is:", shape)

In [None]:
coffee.head(5)

In [None]:
coffee.columns

In [None]:
coffee.info()

In [None]:
#Step 3: Data Cleaning 
coffee.isnull().sum()

In [None]:
#Step 4: Exploratory Data Analysis
coffee.describe().style.background_gradient(cmap='RdPu', axis=0)

Q.1 What is the distribution of transaction quantities?

In [None]:
plt.figure(figsize=(8, 6))
coffee['transaction_qty'].hist(bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Transaction Quantities')
plt.xlabel('Transaction Quantity')
plt.ylabel('Frequency')
plt.show()

Q.2 How does the total bill vary over time?

In [None]:
plt.figure(figsize=(10, 6))
coffee['transaction_date'] = pd.to_datetime(coffee['transaction_date'], format='%d-%m-%Y')
total_bill_time = coffee.groupby(coffee['transaction_date'].dt.to_period('M'))['Total_Bill'].sum()
total_bill_time.plot(marker='o')
plt.title('Total Bill Variation Over Time')
plt.xlabel('Month')
plt.ylabel('Total Bill')
plt.grid(True)
plt.show()

Q.3 Which store location has the highest sales?

In [None]:
plt.figure(figsize=(10, 6))
sales_per_location = coffee.groupby('store_location')['Total_Bill'].sum().sort_values(ascending=False)
sales_per_location.plot(kind='bar', color='skyblue')
plt.title('Total Sales Per Store Location')
plt.xlabel('Store Location')
plt.ylabel('Total Sales')
plt.xticks(rotation=360)
plt.show()

Q.4 What are the most popular product categories ?

In [None]:
plt.figure(figsize=(8, 6))
coffee['product_category'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Popular Product Categories')
plt.ylabel('')
plt.legend(bbox_to_anchor=(1, 0.5), loc="lower left")
plt.show()

Q.5 Is there a correlation between unit price and transaction quantity?

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(coffee['unit_price'], coffee['transaction_qty'], color='skyblue', alpha=0.5)
plt.title('Correlation between Unit Price and Transaction Quantity')
plt.xlabel('Unit Price')
plt.ylabel('Transaction Quantity')
plt.show()

Q.6 How does the distribution of transaction quantities vary across different product types?

In [None]:
plt.figure(figsize=(10, 6))
coffee.boxplot(column='transaction_qty', by='product_type', figsize=(10, 6), rot=45)
plt.title('Distribution of Transaction Quantities Across Product Types')
plt.ylabel('Transaction Quantity')
plt.xlabel('Product Type')
plt.xticks(rotation=45)
plt.show()

Q.7 What is the average transaction quantity per hour of the day?

In [None]:
plt.figure(figsize=(10, 6))
hourly_avg_qty = coffee.groupby('Hour')['transaction_qty'].mean()
hourly_avg_qty.plot(kind='line', marker='o')
plt.title('Average Transaction Quantity per Hour')
plt.xlabel('Hour')
plt.ylabel('Average Transaction Quantity')
plt.grid(True)
plt.show()

Q.8 Are there any outliers in the total bill?

In [None]:
plt.figure(figsize=(8, 6))
coffee['Total_Bill'].plot(kind='box', vert=False, patch_artist=True)
plt.title('Distribution of Total Bill and Outliers')
plt.xlabel('Total Bill')
plt.show()

Q.9 How do sales vary between weekdays and weekends?

In [None]:
plt.figure(figsize=(8, 6))
coffee['Day Name'] = pd.Categorical(coffee['Day Name'], categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
sales_per_day = coffee.groupby('Day Name')['Total_Bill'].sum()
sales_per_day.plot(kind='bar', color='skyblue')
plt.title('Sales Variation Between Weekdays and Weekends')
plt.xlabel('Day of the Week')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.show()

Q.10 What is the trend in sales over the months?

In [None]:
plt.figure(figsize=(10, 6))
monthly_sales = coffee.groupby('Month')['Total_Bill'].sum()
monthly_sales.plot(kind='line', marker='o')
plt.title('Trend in Sales Over the Months')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.grid(True)
plt.show()