
Business Problem
Effective inventory and sales management are critical for optimizing profitability in the retail and wholesale industry. Companies need to ensure that they are not incurring losses due to inefficient pricing, poor inventory turnover, or vendor dependency. The goal of this analysis is to:
• Identify underperforming brands that require promotional or pricing adjustments.
• Determine top vendors contributing to sales and gross profit.
• Analyze the impact of bulk purchasing on unit costs.
• Assess inventory turnover to reduce holding costs and improve efficiency.
HD
• Investigate the profitability variance between high-performing and Dw-p6:44/m:55:19encUnderstandin... >



**Exploratory Data Analysis**

Understanding the dataset to explore how the data is present in the database and if there is a need of creating some aggregated tables that can help with:

• Vendor selection for profitability

• Product Pricing Optimization

In [3]:
import pandas as pd
import sqlite3

In [4]:
# createing database connection
conn = sqlite3.connect('inventory.db')

In [5]:
# checking table present in database
tables = pd.read_sql_query(" select name from sqlite_master where type = 'table' ", conn)
tables

Unnamed: 0,name
0,Vendor_sales_summery
1,vendor_sale_summery
2,begin_inventory
3,end_inventory
4,purchases
5,purchase_prices
6,sales
7,vendor_invoice


In [6]:
for table in tables['name']:
    print('-' * 30, f'{table}','-'*30)
    print('Count of records:', pd.read_sql(f"select count(*) as count from {table}", conn)['count'].values[0])
    display(pd.read_sql(f"select * from {table} limit 5", conn))

------------------------------ Vendor_sales_summery ------------------------------
Count of records: 10692


Unnamed: 0,VendorNumber,VendorName,Brand,Description,PurchasePrice,ActualPrice,TotalPurchaseDollars,Volume,TotalPurchasesQuantity,TotalSalesDollars,TotalSalesPrice,TotalExciseTax,FreightCost,TotalSalesQuantity,GrossProfit,ProfitMargin,StockTurnover,SalespurchaseRatio
0,1128,BROWN-FORMAN CORP,1233,Jack Daniels No 7 Black,26.27,36.99,3811251.6,1750.0,145080,3201844.22,441868.14,164589.18,68601.68,89578.0,-609407.38,-19.033012,0.617439,0.840103
1,4425,MARTIGNETTI COMPANIES,3405,Tito's Handmade Vodka,23.19,28.99,3804041.22,1750.0,164038,2958675.28,369117.06,180190.31,144929.24,98068.0,-845365.94,-28.572447,0.597837,0.777772
2,17035,PERNOD RICARD USA,8068,Absolut 80 Proof,18.24,24.99,3418303.68,1750.0,187407,3011126.3,305995.78,228060.2,123780.22,124120.0,-407177.38,-13.522428,0.662302,0.880883
3,3960,DIAGEO NORTH AMERICA INC,4261,Capt Morgan Spiced Rum,16.17,22.99,3261197.94,1750.0,201682,2443144.75,274205.1,197200.85,257032.07,107325.0,-818053.19,-33.483615,0.53215,0.749156
4,3960,DIAGEO NORTH AMERICA INC,3545,Ketel One Vodka,21.89,29.99,3023206.01,1750.0,138109,2668148.63,356089.29,157899.82,257032.07,85937.0,-355057.38,-13.307256,0.62224,0.882556


------------------------------ vendor_sale_summery ------------------------------
Count of records: 10692


Unnamed: 0,VendorNumber,VendorName,Brand,Description,PurchasePrice,ActualPrice,TotalPurchaseDollars,Volume,TotalPurchasesQuantity,TotalSalesDollars,TotalSalesPrice,TotalExciseTax,FreightCost,TotalSalesQuantity,GrossProfit,ProfitMargin,StockTurnover,SalespurchaseRatio
0,1128,BROWN-FORMAN CORP,1233,Jack Daniels No 7 Black,26.27,36.99,3811251.6,1750.0,145080,3201844.22,441868.14,164589.18,68601.68,89578.0,-609407.38,-19.033012,0.617439,0.840103
1,4425,MARTIGNETTI COMPANIES,3405,Tito's Handmade Vodka,23.19,28.99,3804041.22,1750.0,164038,2958675.28,369117.06,180190.31,144929.24,98068.0,-845365.94,-28.572447,0.597837,0.777772
2,17035,PERNOD RICARD USA,8068,Absolut 80 Proof,18.24,24.99,3418303.68,1750.0,187407,3011126.3,305995.78,228060.2,123780.22,124120.0,-407177.38,-13.522428,0.662302,0.880883
3,3960,DIAGEO NORTH AMERICA INC,4261,Capt Morgan Spiced Rum,16.17,22.99,3261197.94,1750.0,201682,2443144.75,274205.1,197200.85,257032.07,107325.0,-818053.19,-33.483615,0.53215,0.749156
4,3960,DIAGEO NORTH AMERICA INC,3545,Ketel One Vodka,21.89,29.99,3023206.01,1750.0,138109,2668148.63,356089.29,157899.82,257032.07,85937.0,-355057.38,-13.307256,0.62224,0.882556


------------------------------ begin_inventory ------------------------------
Count of records: 206529


Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2024-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2024-01-01
2,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,6,36.99,2024-01-01
3,1_HARDERSFIELD_63,1,HARDERSFIELD,63,Herradura Reposado Tequila,750mL,3,38.99,2024-01-01
4,1_HARDERSFIELD_72,1,HARDERSFIELD,72,No. 3 London Dry Gin,750mL,6,34.99,2024-01-01


------------------------------ end_inventory ------------------------------
Count of records: 224489


Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2024-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2024-12-31
2,1_HARDERSFIELD_63,1,HARDERSFIELD,63,Herradura Reposado Tequila,750mL,7,38.99,2024-12-31
3,1_HARDERSFIELD_72,1,HARDERSFIELD,72,No. 3 London Dry Gin,750mL,4,34.99,2024-12-31
4,1_HARDERSFIELD_75,1,HARDERSFIELD,75,Three Olives Tomato Vodka,750mL,7,14.99,2024-12-31


------------------------------ purchases ------------------------------
Count of records: 2372474


Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2023-12-21,2024-01-02,2024-01-04,2024-02-16,35.71,6,214.26,1
1,30_CULCHETH_5255,30,5255,TGI Fridays Ultimte Mudslide,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-01,2024-01-07,2024-02-21,9.35,4,37.4,1
2,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-02,2024-01-07,2024-02-21,9.41,5,47.05,1
3,1_HARDERSFIELD_5255,1,5255,TGI Fridays Ultimte Mudslide,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-01,2024-01-07,2024-02-21,9.35,6,56.1,1
4,76_DONCASTER_2034,76,2034,Glendalough Double Barrel,750mL,388,ATLANTIC IMPORTING COMPANY,8169,2023-12-24,2024-01-02,2024-01-09,2024-02-16,21.32,5,106.6,1


------------------------------ purchase_prices ------------------------------
Count of records: 12261


Unnamed: 0,Brand,Description,Price,Size,Volume,Classification,PurchasePrice,VendorNumber,VendorName
0,58,Gekkeikan Black & Gold Sake,12.99,750mL,750,1,9.28,8320,SHAW ROSS INT L IMP LTD
1,62,Herradura Silver Tequila,36.99,750mL,750,1,28.67,1128,BROWN-FORMAN CORP
2,63,Herradura Reposado Tequila,38.99,750mL,750,1,30.46,1128,BROWN-FORMAN CORP
3,72,No. 3 London Dry Gin,34.99,750mL,750,1,26.11,9165,ULTRA BEVERAGE COMPANY LLP
4,75,Three Olives Tomato Vodka,14.99,750mL,750,1,10.94,7245,PROXIMO SPIRITS INC.


------------------------------ sales ------------------------------
Count of records: 8072002


Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750mL,1.0,16.49,16.49,2024-01-01,750.0,1.0,0.79,12546.0,JIM BEAM BRANDS COMPANY
1,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750mL,2.0,32.98,16.49,2024-01-02,750.0,1.0,1.57,12546.0,JIM BEAM BRANDS COMPANY
2,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750mL,1.0,16.49,16.49,2024-01-03,750.0,1.0,0.79,12546.0,JIM BEAM BRANDS COMPANY
3,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750mL,1.0,14.49,14.49,2024-01-08,750.0,1.0,0.79,12546.0,JIM BEAM BRANDS COMPANY
4,1_HARDERSFIELD_1005,1,1005,Maker's Mark Combo Pack,375mL 2 Pk,2.0,69.98,34.99,2024-01-09,375.0,1.0,0.79,12546.0,JIM BEAM BRANDS COMPANY


------------------------------ vendor_invoice ------------------------------
Count of records: 5543


Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2024-01-04,8124,2023-12-21,2024-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2024-01-07,8137,2023-12-22,2024-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2024-01-09,8169,2023-12-24,2024-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2024-01-12,8106,2023-12-20,2024-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2024-01-07,8170,2023-12-24,2024-02-12,1935,15527.25,429.2,


In [7]:
purchases = pd.read_sql_query("select * from purchases where VendorNumber = 4466", conn)
purchases

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,30_CULCHETH_5255,30,5255,TGI Fridays Ultimte Mudslide,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-01,2024-01-07,2024-02-21,9.35,4,37.40,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-02,2024-01-07,2024-02-21,9.41,5,47.05,1
2,1_HARDERSFIELD_5255,1,5255,TGI Fridays Ultimte Mudslide,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2023-12-22,2024-01-01,2024-01-07,2024-02-21,9.35,6,56.10,1
3,38_GOULCREST_5215,38,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8207,2023-12-27,2024-01-07,2024-01-19,2024-02-26,9.41,6,56.46,1
4,59_CLAETHORPES_5215,59,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8207,2023-12-27,2024-01-05,2024-01-19,2024-02-26,9.41,6,56.46,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2187,81_PEMBROKE_5215,81,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,13595,2024-12-20,2024-12-29,2025-01-04,2025-02-10,9.41,6,56.46,1
2188,62_KILMARNOCK_5255,62,5255,TGI Fridays Ultimte Mudslide,1.75L,4466,AMERICAN VINTAGE BEVERAGE,13595,2024-12-20,2024-12-28,2025-01-04,2025-02-10,9.35,5,46.75,1
2189,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,13595,2024-12-20,2024-12-28,2025-01-04,2025-02-10,9.41,5,47.05,1
2190,6_GOULCREST_5215,6,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,13595,2024-12-20,2024-12-31,2025-01-04,2025-02-10,9.41,6,56.46,1


In [8]:
purchase_prices = pd.read_sql(''' select * from purchase_prices where VendorNumber = 4466''', conn)
purchase_prices

Unnamed: 0,Brand,Description,Price,Size,Volume,Classification,PurchasePrice,VendorNumber,VendorName
0,5215,TGI Fridays Long Island Iced,12.99,1750mL,1750,1,9.41,4466,AMERICAN VINTAGE BEVERAGE
1,5255,TGI Fridays Ultimte Mudslide,12.99,1750mL,1750,1,9.35,4466,AMERICAN VINTAGE BEVERAGE
2,3140,TGI Fridays Orange Dream,14.99,1750mL,1750,1,11.19,4466,AMERICAN VINTAGE BEVERAGE


In [9]:
vendor_invoice = pd.read_sql(''' select * from vendor_invoice where VendorNumber = 4466''', conn)
vendor_invoice

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,4466,AMERICAN VINTAGE BEVERAGE,2024-01-07,8137,2023-12-22,2024-02-21,15,140.55,8.57,
1,4466,AMERICAN VINTAGE BEVERAGE,2024-01-19,8207,2023-12-27,2024-02-26,335,3142.33,16.97,
2,4466,AMERICAN VINTAGE BEVERAGE,2024-01-18,8307,2024-01-03,2024-02-18,41,383.35,1.99,
3,4466,AMERICAN VINTAGE BEVERAGE,2024-01-27,8469,2024-01-14,2024-03-11,72,673.2,3.3,
4,4466,AMERICAN VINTAGE BEVERAGE,2024-02-04,8532,2024-01-19,2024-03-15,79,740.21,3.48,
5,4466,AMERICAN VINTAGE BEVERAGE,2024-02-09,8604,2024-01-24,2024-03-15,347,3261.37,17.61,
6,4466,AMERICAN VINTAGE BEVERAGE,2024-02-17,8793,2024-02-05,2024-04-02,72,675.36,3.17,
7,4466,AMERICAN VINTAGE BEVERAGE,2024-03-01,8892,2024-02-12,2024-03-28,117,1096.05,5.15,
8,4466,AMERICAN VINTAGE BEVERAGE,2024-03-07,8995,2024-02-19,2024-04-02,129,1209.27,5.44,
9,4466,AMERICAN VINTAGE BEVERAGE,2024-03-12,9033,2024-02-22,2024-04-16,147,1377.87,6.61,


In [10]:
sales = pd.read_sql(''' select * from sales where VendorNo = 4466''', conn)
sales

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_5215,1,5215,TGI Fridays Long Island Iced,1.75L,1.0,12.99,12.99,2024-01-09,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
1,1_HARDERSFIELD_5215,1,5215,TGI Fridays Long Island Iced,1.75L,1.0,12.99,12.99,2024-01-12,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
2,1_HARDERSFIELD_5215,1,5215,TGI Fridays Long Island Iced,1.75L,1.0,12.99,12.99,2024-01-15,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
3,1_HARDERSFIELD_5215,1,5215,TGI Fridays Long Island Iced,1.75L,1.0,12.99,12.99,2024-01-21,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
4,1_HARDERSFIELD_5215,1,5215,TGI Fridays Long Island Iced,1.75L,1.0,12.99,12.99,2024-01-23,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5788,74_PAENTMARWY_5255,74,5255,TGI Fridays Ultimte Mudslide,1.75L,1.0,12.99,12.99,2024-08-23,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
5789,74_PAENTMARWY_5255,74,5255,TGI Fridays Ultimte Mudslide,1.75L,1.0,12.99,12.99,2024-08-25,1750.0,1.0,1.84,4466.0,AMERICAN VINTAGE BEVERAGE
5790,74_PAENTMARWY_5255,74,5255,TGI Fridays Ultimte Mudslide,1.75L,3.0,38.97,12.99,2024-08-26,1750.0,1.0,5.51,4466.0,AMERICAN VINTAGE BEVERAGE
5791,74_PAENTMARWY_5255,74,5255,TGI Fridays Ultimte Mudslide,1.75L,2.0,25.98,12.99,2024-08-27,1750.0,1.0,3.67,4466.0,AMERICAN VINTAGE BEVERAGE


In [11]:
purchases.groupby(['Brand','PurchasePrice'])[['Quantity', 'Dollars']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity,Dollars
Brand,PurchasePrice,Unnamed: 2_level_1,Unnamed: 3_level_1
3140,11.19,4640,51921.6
5215,9.41,4923,46325.43
5255,9.35,6215,58110.25


In [12]:
sales.groupby('Brand')[['SalesDollars','SalesPrice', 'SalesQuantity']].sum()

Unnamed: 0_level_0,SalesDollars,SalesPrice,SalesQuantity
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3140,32942.64,17341.65,2536.0
5215,40074.15,26434.65,3085.0
5255,49517.88,31474.77,3812.0



• The purchases table contains actual purchase data, including the date of purchase, products (brands) purchased by vendors, the amount paid (in dollars), and the quantity purchased.

• The purchase price column is derived from the purchase_prices table, which provides product-wise actual and purchase prices. The combination of vendor and brand is unique in this table.

• The vendor_invoice table aggregates data from the purchases table, summarizing quantity and dollar amounts, along with an additional column for freight. This table maintains uniqueness based on vendor and PO number.

• The sales table captures actual sales transactions, detailing the brands purchased by vendors, the quantity sold, the selling price, and the revenue earned.

--> As the data that we need for analysis is distributed in different tables, we need to create a summary table containing:

• purchase transactions made by vendors

• sales transaction data

• freight costs for each vendor

• actual product prices from vendors

In [14]:
vendor_invoice.columns

Index(['VendorNumber', 'VendorName', 'InvoiceDate', 'PONumber', 'PODate',
       'PayDate', 'Quantity', 'Dollars', 'Freight', 'Approval'],
      dtype='object')

In [15]:
freight_summery = pd.read_sql(''' select VendorNumber, sum(Freight) as FreightCost
from vendor_invoice
group by VendorNumber''', conn)

freight_summery

Unnamed: 0,VendorNumber,FreightCost
0,2,27.08
1,54,0.48
2,60,367.52
3,105,62.39
4,200,6.19
...,...,...
121,98450,856.02
122,99166,130.09
123,172662,178.34
124,173357,202.50


In [16]:
print(purchases.columns)
print(purchase_prices.columns)

Index(['InventoryId', 'Store', 'Brand', 'Description', 'Size', 'VendorNumber',
       'VendorName', 'PONumber', 'PODate', 'ReceivingDate', 'InvoiceDate',
       'PayDate', 'PurchasePrice', 'Quantity', 'Dollars', 'Classification'],
      dtype='object')
Index(['Brand', 'Description', 'Price', 'Size', 'Volume', 'Classification',
       'PurchasePrice', 'VendorNumber', 'VendorName'],
      dtype='object')


In [17]:
pd.read_sql_query('''select 
    p.VendorNumber, 
    p.VendorName, 
    p.Brand, 
    p.PurchasePrice,
    pp.Volume, 
    pp.Price AS ActualPrice,
    SUM(p.Quantity) AS TotalPurchasesQuantity,
    SUM(p.Dollars) AS TotalPurchaseDollars
from purchases p
join purchase_prices pp on p.Brand = pp.Brand
where p.PurchasePrice > 0
group by p.VendorNumber, p.VendorName, p.Brand
order by TotalPurchaseDollars''', conn)  


Unnamed: 0,VendorNumber,VendorName,Brand,PurchasePrice,Volume,ActualPrice,TotalPurchasesQuantity,TotalPurchaseDollars
0,7245,PROXIMO SPIRITS INC.,3065,0.71,50,0.99,1,0.71
1,3960,DIAGEO NORTH AMERICA INC,6127,1.47,200,1.99,1,1.47
2,3924,HEAVEN HILL DISTILLERIES,9123,0.74,50,0.99,2,1.48
3,8004,SAZERAC CO INC,5683,0.39,50,0.49,6,2.34
4,9815,WINE GROUP INC,8527,1.32,750,4.99,2,2.64
...,...,...,...,...,...,...,...,...
10687,3960,DIAGEO NORTH AMERICA INC,3545,21.89,1750,29.99,138109,3023206.01
10688,3960,DIAGEO NORTH AMERICA INC,4261,16.17,1750,22.99,201682,3261197.94
10689,17035,PERNOD RICARD USA,8068,18.24,1750,24.99,187407,3418303.68
10690,4425,MARTIGNETTI COMPANIES,3405,23.19,1750,28.99,164038,3804041.22


In [18]:
sales.columns

Index(['InventoryId', 'Store', 'Brand', 'Description', 'Size', 'SalesQuantity',
       'SalesDollars', 'SalesPrice', 'SalesDate', 'Volume', 'Classification',
       'ExciseTax', 'VendorNo', 'VendorName'],
      dtype='object')

In [19]:
pd.read_sql(''' select 
    VendorNo,
    Brand,
    sum(SalesDollars) as TotalSalesDollars,
    sum(SalesPrice) as TotalSalesPrice,
    sum(SalesQuantity) as TotalSalesQuantity,
    sum(ExciseTax) as TotalExciseTax
    from sales
    group by VendorNo, Brand
    order by TotalSalesDollars''', conn)

Unnamed: 0,VendorNo,Brand,TotalSalesDollars,TotalSalesPrice,TotalSalesQuantity,TotalExciseTax
0,,5103,,,,
1,8004.0,5287,0.98,0.98,2.0,0.10
2,9206.0,2773,0.99,0.99,1.0,0.05
3,3252.0,3933,1.98,0.99,2.0,0.10
4,10050.0,3623,1.98,1.98,2.0,0.10
...,...,...,...,...,...,...
10176,3960.0,4261,2443144.75,274205.10,107325.0,197200.85
10177,3960.0,3545,2668148.63,356089.29,85937.0,157899.82
10178,4425.0,3405,2958675.28,369117.06,98068.0,180190.31
10179,17035.0,8068,3011126.30,305995.78,124120.0,228060.20


In [20]:
"""import time
start = time.time()
final_table = pd.read_sql(''' select
    pp.VendorNumber, 
    pp.Price as ActualPrice,
    pp.Brand, 
    pp.PurchasePrice,
    sum(s.SalesDollars) as TotalSalesDollars,
    sum(s.SalesPrice) as TotalSalesPrice,
    sum(s.SalesQuantity) as TotalSalesQuantity,
    sum(s.ExciseTax) as TotalExciseTax,
    SUM(vi.Quantity) AS TotalPurchasesQuantity,
    SUM(vi.Dollars) AS TotalPurchaseDollars
from purchase_prices pp
join sales s on pp.VendorNumber = s.VendorNo AND pp.Brand = s.Brand
join vendor_invoice vi on pp.VendorNumber = vi.VendorNumber
group by pp.VendorNumber, pp.Price, pp.Brand, pp.PurchasePrice
''',conn)
end = time.time()
"""

"import time\nstart = time.time()\nfinal_table = pd.read_sql(''' select\n    pp.VendorNumber, \n    pp.Price as ActualPrice,\n    pp.Brand, \n    pp.PurchasePrice,\n    sum(s.SalesDollars) as TotalSalesDollars,\n    sum(s.SalesPrice) as TotalSalesPrice,\n    sum(s.SalesQuantity) as TotalSalesQuantity,\n    sum(s.ExciseTax) as TotalExciseTax,\n    SUM(vi.Quantity) AS TotalPurchasesQuantity,\n    SUM(vi.Dollars) AS TotalPurchaseDollars\nfrom purchase_prices pp\njoin sales s on pp.VendorNumber = s.VendorNo AND pp.Brand = s.Brand\njoin vendor_invoice vi on pp.VendorNumber = vi.VendorNumber\ngroup by pp.VendorNumber, pp.Price, pp.Brand, pp.PurchasePrice\n''',conn)\nend = time.time()\n"

In [21]:
vendor_sale_summery = pd.read_sql(''' WITH FreightSummery as ( 
    select
        VendorNumber,
        sum(Freight) as FreightCost
    from vendor_invoice
    group by VendorNumber
),

PurchaseSummery as(
    select
        p.VendorNumber, 
        p.VendorName, 
        p.Brand, 
        p.PurchasePrice,
        p.Description,
        pp.Volume, 
        pp.Price AS ActualPrice,
        SUM(p.Quantity) AS TotalPurchasesQuantity,
        SUM(p.Dollars) AS TotalPurchaseDollars
    from purchases p 
    join purchase_prices pp on p.Brand = pp.Brand
    where p.PurchasePrice  > 0
    group by p.VendorNumber, p.VendorName, p.Brand, p.Description, p.PurchasePrice, pp.Price, pp.Volume
),

SalesSummery as (
    select 
        VendorNo,
        Brand,
        sum(SalesDollars) as TotalSalesDollars,
        sum(SalesPrice) as TotalSalesPrice,
        sum(SalesQuantity) as TotalSalesQuantity,
        sum(ExciseTax) as TotalExciseTax
    from sales
    group by VendorNo, Brand
)

select 
    ps.vendorNumber, ps.VendorName, ps.Brand, ps.Description, ps.PurchasePrice, ps.ActualPrice, ps.TotalPurchaseDollars,
    ps.Volume, ps.TotalPurchasesQuantity, ss.TotalSalesDollars, ss.TotalSalesPrice, ss.TotalExciseTax, fs.FreightCost, ss.TotalSalesQuantity
from PurchaseSummery ps 
left join SalesSummery ss on ps.VendorNumber = ss.VendorNo AND ps.Brand = ss.Brand
left join FreightSummery fs on ps.VendorNumber = fs.VendorNumber
order by ps.totalPurchaseDollars DESC''', conn)



In [22]:
vendor_sale_summery

Unnamed: 0,VendorNumber,VendorName,Brand,Description,PurchasePrice,ActualPrice,TotalPurchaseDollars,Volume,TotalPurchasesQuantity,TotalSalesDollars,TotalSalesPrice,TotalExciseTax,FreightCost,TotalSalesQuantity
0,1128,BROWN-FORMAN CORP,1233,Jack Daniels No 7 Black,26.27,36.99,3811251.60,1750,145080,3201844.22,441868.14,164589.18,68601.68,89578.0
1,4425,MARTIGNETTI COMPANIES,3405,Tito's Handmade Vodka,23.19,28.99,3804041.22,1750,164038,2958675.28,369117.06,180190.31,144929.24,98068.0
2,17035,PERNOD RICARD USA,8068,Absolut 80 Proof,18.24,24.99,3418303.68,1750,187407,3011126.30,305995.78,228060.20,123780.22,124120.0
3,3960,DIAGEO NORTH AMERICA INC,4261,Capt Morgan Spiced Rum,16.17,22.99,3261197.94,1750,201682,2443144.75,274205.10,197200.85,257032.07,107325.0
4,3960,DIAGEO NORTH AMERICA INC,3545,Ketel One Vodka,21.89,29.99,3023206.01,1750,138109,2668148.63,356089.29,157899.82,257032.07,85937.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10687,9815,WINE GROUP INC,8527,Concannon Glen Ellen Wh Zin,1.32,4.99,2.64,750,2,15.95,10.96,0.55,27100.41,5.0
10688,8004,SAZERAC CO INC,5683,Dr McGillicuddy's Apple Pie,0.39,0.49,2.34,50,6,62.72,0.98,6.72,50293.62,128.0
10689,3924,HEAVEN HILL DISTILLERIES,9123,Deep Eddy Vodka,0.74,0.99,1.48,50,2,,,,14069.87,
10690,3960,DIAGEO NORTH AMERICA INC,6127,The Club Strawbry Margarita,1.47,1.99,1.47,200,1,83.58,41.79,8.82,257032.07,42.0



This query generates a vendor-wise sales and purchase summary, which is valuable for:
Performance Optimization:

• The query involves heavy joins and aggregations on large datasets like sales and purchases.

• Storing the pre-aggregated results avoids repeated expensive computations.

• Helps in analyzing sales, purchases, and pricing for different vendors and brands.

• Future Benefits of Storing this data for faster Dashboarding & Reporting.

• Instead of running expensive queries each time, dashboards can fetch data quickly from vendor_sales_summary.

In [24]:
vendor_sale_summery.dtypes

VendorNumber                int64
VendorName                 object
Brand                       int64
Description                object
PurchasePrice             float64
ActualPrice               float64
TotalPurchaseDollars      float64
Volume                     object
TotalPurchasesQuantity      int64
TotalSalesDollars         float64
TotalSalesPrice           float64
TotalExciseTax            float64
FreightCost               float64
TotalSalesQuantity        float64
dtype: object

In [25]:
vendor_sale_summery.isnull().sum()

VendorNumber                 0
VendorName                   0
Brand                        0
Description                  0
PurchasePrice                0
ActualPrice                  0
TotalPurchaseDollars         0
Volume                       0
TotalPurchasesQuantity       0
TotalSalesDollars         1252
TotalSalesPrice           1252
TotalExciseTax            1252
FreightCost                  0
TotalSalesQuantity        1252
dtype: int64

In [26]:
vendor_sale_summery['VendorName'].unique()

array(['BROWN-FORMAN CORP          ', 'MARTIGNETTI COMPANIES',
       'PERNOD RICARD USA          ', 'DIAGEO NORTH AMERICA INC   ',
       'BACARDI USA INC            ', 'JIM BEAM BRANDS COMPANY    ',
       'MAJESTIC FINE WINES        ', 'ULTRA BEVERAGE COMPANY LLP ',
       'STOLI GROUP,(USA) LLC      ', 'PROXIMO SPIRITS INC.       ',
       'MOET HENNESSY USA INC      ', 'CAMPARI AMERICA            ',
       'SAZERAC CO INC             ', 'CONSTELLATION BRANDS INC   ',
       'M S WALKER INC             ', 'SAZERAC NORTH AMERICA INC. ',
       'PALM BAY INTERNATIONAL INC ', 'REMY COINTREAU USA INC     ',
       'SIDNEY FRANK IMPORTING CO  ', 'E & J GALLO WINERY         ',
       'WILLIAM GRANT & SONS INC   ', 'HEAVEN HILL DISTILLERIES   ',
       'DISARONNO INTERNATIONAL LLC', 'EDRINGTON AMERICAS         ',
       'CASTLE BRANDS CORP.        ', 'SOUTHERN WINE & SPIRITS NE ',
       'STE MICHELLE WINE ESTATES  ', 'TRINCHERO FAMILY ESTATES   ',
       'MHW LTD                    ', 'W

In [27]:
vendor_sale_summery['Description'].unique()

array(['Jack Daniels No 7 Black', "Tito's Handmade Vodka",
       'Absolut 80 Proof', ..., 'Crown Royal Apple',
       'Concannon Glen Ellen Wh Zin', 'The Club Strawbry Margarita'],
      dtype=object)

In [28]:
vendor_sale_summery['Volume']= vendor_sale_summery['Volume'].astype('float64')

In [29]:
vendor_sale_summery.fillna(0, inplace = True)

In [30]:
vendor_sale_summery['VendorName'] = vendor_sale_summery['VendorName'].str.strip()

In [31]:
vendor_sale_summery['GrossProfit'] = vendor_sale_summery['TotalSalesDollars'] - vendor_sale_summery['TotalPurchaseDollars']

In [32]:
vendor_sale_summery['ProfitMargin'] = (vendor_sale_summery['GrossProfit']/vendor_sale_summery['TotalSalesDollars'])*100

In [33]:
vendor_sale_summery['StockTurnover'] = vendor_sale_summery['TotalSalesQuantity']/ vendor_sale_summery['TotalPurchasesQuantity']

In [34]:
vendor_sale_summery['SalespurchaseRatio'] = vendor_sale_summery['TotalSalesDollars']/ vendor_sale_summery['TotalPurchaseDollars']

In [35]:
cursor = conn.cursor()

In [36]:
cursor.execute("DROP TABLE IF EXISTS vendor_sale_summery;")
cursor.execute('''
CREATE TABLE vendor_sale_summery (
    VendorNumber INT,
    VendorName VARCHAR(100),
    Brand VARCHAR(50),
    Description VARCHAR(255),
    ActualPrice DECIMAL(10,2),
    Volume INT,
    TotalPurchaseDollars DECIMAL(15,2),
    TotalSalesQuantity INT,
    TotalSalesDollars DECIMAL(15,2),
    TotalSalesPrice DECIMAL(15,2),
    TotalExciseTax DECIMAL(15,2),
    FreightCost DECIMAL(15,2),
    GrossProfit DECIMAL(15,2),
    ProfitMargin DECIMAL(15,2),
    StockTurnover DECIMAL(15,2),
    SalespurchaseRatio DECIMAL(15,2),
    PRIMARY KEY (VendorNumber, Brand)
);
''')


<sqlite3.Cursor at 0x299a0cd3840>

In [37]:
pd.read_sql_query("select * from vendor_sale_summery", conn)

Unnamed: 0,VendorNumber,VendorName,Brand,Description,ActualPrice,Volume,TotalPurchaseDollars,TotalSalesQuantity,TotalSalesDollars,TotalSalesPrice,TotalExciseTax,FreightCost,GrossProfit,ProfitMargin,StockTurnover,SalespurchaseRatio


In [38]:
vendor_sale_summery.to_sql('vendor_sale_summery', conn, if_exists = 'replace', index= False)

10692

In [39]:
import pandas as pd
import os
import sqlite3
import time
import logging
from sqlalchemy import create_engine
from ingestion_db import ingest_db

# Ensure logs directory exists
if not os.path.exists('logs'):
    os.makedirs('logs')

# Reset previous logging handlers
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
    
logging.basicConfig(
    filename='logs/get_vendor_summery.log',
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filemode='a'
)

def create_vendor_summery(conn):
    '''Merge different tables to create vendor summary and return the dataframe.'''
    vendor_sale_summery = pd.read_sql_query(""" 
    WITH FreightSummery AS (
        SELECT VendorNumber, SUM(Freight) AS FreightCost
        FROM vendor_invoice
        GROUP BY VendorNumber
    ),
    
    PurchaseSummery AS (
        SELECT
            p.VendorNumber, 
            p.VendorName, 
            p.Brand, 
            p.PurchasePrice,
            p.Description,
            pp.Volume, 
            pp.Price AS ActualPrice,
            SUM(p.Quantity) AS TotalPurchasesQuantity,
            SUM(p.Dollars) AS TotalPurchaseDollars
        FROM purchases p 
        JOIN purchase_prices pp ON p.Brand = pp.Brand
        WHERE p.PurchasePrice > 0
        GROUP BY p.VendorNumber, p.VendorName, p.Brand, p.Description, p.PurchasePrice, pp.Price, pp.Volume
    ),
    
    SalesSummery AS (
        SELECT 
            VendorNo,
            Brand,
            SUM(SalesDollars) AS TotalSalesDollars,
            SUM(SalesPrice) AS TotalSalesPrice,
            SUM(SalesQuantity) AS TotalSalesQuantity,
            SUM(ExciseTax) AS TotalExciseTax
        FROM sales
        GROUP BY VendorNo, Brand
    )
    
    SELECT 
        ps.VendorNumber, 
        ps.VendorName, 
        ps.Brand, 
        ps.Description, 
        ps.PurchasePrice, 
        ps.ActualPrice, 
        ps.TotalPurchaseDollars,
        ps.Volume, 
        ps.TotalPurchasesQuantity, 
        ss.TotalSalesDollars, 
        ss.TotalSalesPrice, 
        ss.TotalExciseTax, 
        fs.FreightCost, 
        ss.TotalSalesQuantity
    FROM PurchaseSummery ps 
    LEFT JOIN SalesSummery ss ON ps.VendorNumber = ss.VendorNo AND ps.Brand = ss.Brand
    LEFT JOIN FreightSummery fs ON ps.VendorNumber = fs.VendorNumber
    ORDER BY ps.TotalPurchaseDollars DESC
    """, conn)
    return vendor_sale_summery

def clean_data(df):
    '''Clean the data and create new analytical columns.'''
    df['Volume'] = df['Volume'].astype('float')
    df.fillna(0, inplace=True)
    df['VendorName'] = df['VendorName'].str.strip()
    df['Description'] = df['Description'].str.strip()

    # Creating new columns
    df['GrossProfit'] = df['TotalSalesDollars'] - df['TotalPurchaseDollars']
    df['ProfitMargin'] = (df['GrossProfit'] / df['TotalSalesDollars']) * 100
    df['StockTurnover'] = df['TotalSalesQuantity'] / df['TotalPurchasesQuantity']
    df['SalespurchaseRatio'] = df['TotalSalesDollars'] / df['TotalPurchaseDollars']
    
    return df

if __name__ == '__main__':
    start_time = time.time()
    logging.info("Process started.")

    # DB connection
    conn = sqlite3.connect('inventory.db')

    # Step 1: Create summary
    logging.info('Creating Vendor Summary Table...')
    summary_df = create_vendor_summery(conn)
    logging.info(summary_df.head())

    # Step 2: Clean data
    logging.info('Cleaning Data...')
    clean_df = clean_data(summary_df)
    logging.info(clean_df.head())

    # Step 3: Ingest data
    logging.info('Ingesting Data...')
    ingest_db(clean_df, 'vendor_sale_summery', conn)
    logging.info('Data ingestion completed.')

    end_time = time.time()
    total_time = end_time - start_time
    logging.info(f"Start Time: {time.ctime(start_time)}")
    logging.info(f"End Time: {time.ctime(end_time)}")
    logging.info(f"Total Time Taken: {total_time:.2f} seconds")
    print("Process completed successfully.")


Process completed successfully.


In [76]:
#Save DataFrame as CSV in Your Python Script
clean_df.to_csv('vendor_sale_summery.csv', index=False)
