In [2]:
import pyodbc
import pandas as pd

import warnings
warnings.filterwarnings('ignore') 

# defining the server and the database
server = '(localdb)\mssqllocaldb' 
database = 'AdventureWorksDW2012'  

# Define the connection string
conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server}; \
    SERVER='+ server +'; \
    DATABASE ='+ database +';\
    Trusted_Connection=yes;'
)


### Using a TOP 100 filter to limit the amount of data pulled from the exploratory query on 
###  [AdventureWorksDW2012].[dbo].[DimProduct] table
### [AdventureWorksDW2012].[dbo].[DimProductCategory] table
###  [AdventureWorksDW2012].[dbo].[DimProductSubcategory] table for questions  1 - 3.

In [10]:
# [AdventureWorksDW2012].[dbo].[DimProduct] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimProduct]""",conn)
eda_query

Unnamed: 0,ProductKey,ProductAlternateKey,ProductSubcategoryKey,WeightUnitMeasureCode,SizeUnitMeasureCode,EnglishProductName,SpanishProductName,FrenchProductName,StandardCost,FinishedGoodsFlag,...,ChineseDescription,ArabicDescription,HebrewDescription,ThaiDescription,GermanDescription,JapaneseDescription,TurkishDescription,StartDate,EndDate,Status
0,1,AR-5381,,,,Adjustable Race,,,,False,...,,,,,,,,1998-06-01,,Current
1,2,BA-8327,,,,Bearing Ball,,,,False,...,,,,,,,,1998-06-01,,Current
2,3,BE-2349,,,,BB Ball Bearing,,,,False,...,,,,,,,,1998-06-01,,Current
3,4,BE-2908,,,,Headset Ball Bearings,,,,False,...,,,,,,,,1998-06-01,,Current
4,5,BL-2036,,,,Blade,,,,False,...,,,,,,,,1998-06-01,,Current
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,LI-5160,,,,Internal Lock Washer 6,,,,False,...,,,,,,,,1998-06-01,,Current
96,97,LI-5800,,,,Internal Lock Washer 10,,,,False,...,,,,,,,,1998-06-01,,Current
97,98,LI-6000,,,,Internal Lock Washer 1,,,,False,...,,,,,,,,1998-06-01,,Current
98,99,LI-7160,,,,Internal Lock Washer 8,,,,False,...,,,,,,,,1998-06-01,,Current


In [4]:
# [AdventureWorksDW2012].[dbo].[DimProductCategory] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimProductCategory]""",conn)
eda_query

Unnamed: 0,ProductCategoryKey,ProductCategoryAlternateKey,EnglishProductCategoryName,SpanishProductCategoryName,FrenchProductCategoryName
0,1,1,Bikes,Bicicleta,Vélo
1,2,2,Components,Componente,Composant
2,3,3,Clothing,Prenda,Vêtements
3,4,4,Accessories,Accesorio,Accessoire


In [5]:
# [AdventureWorksDW2012].[dbo].[DimProductSubcategory] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimProductSubcategory]""",conn)
eda_query

Unnamed: 0,ProductSubcategoryKey,ProductSubcategoryAlternateKey,EnglishProductSubcategoryName,SpanishProductSubcategoryName,FrenchProductSubcategoryName,ProductCategoryKey
0,1,1,Mountain Bikes,Bicicleta de montaña,VTT,1
1,2,2,Road Bikes,Bicicleta de carretera,Vélo de route,1
2,3,3,Touring Bikes,Bicicleta de paseo,Vélo de randonnée,1
3,4,4,Handlebars,Barra,Barre d'appui,2
4,5,5,Bottom Brackets,Eje de pedalier,Axe de pédalier,2
5,6,6,Brakes,Frenos,Freins,2
6,7,7,Chains,Cadena,Chaîne,2
7,8,8,Cranksets,Bielas,Pédalier,2
8,9,9,Derailleurs,Desviador,Dérailleur,2
9,10,10,Forks,Horquilla,Fourche,2


### 1. Produce a list of product category and their corresponding subcategory

In [12]:
### Thought Process:
### Using a FULL JOIN DimProductCategory table and DimProductSubcategory
### In the SELECT statement output columns product category with its corresponding subcategory

sql_query = pd.read_sql_query("""SELECT Cate.EnglishProductCategoryName, SubCate.EnglishProductSubcategoryName
                                 FROM [AdventureWorksDW2012].[dbo].[DimProductCategory] AS Cate
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimProductSubcategory] AS SubCate
                                 ON SubCate.ProductCategoryKey = Cate.ProductCategoryKey
                                 ORDER BY Cate.ProductCategoryKey""", conn)
sql_query

Unnamed: 0,EnglishProductCategoryName,EnglishProductSubcategoryName
0,Bikes,Mountain Bikes
1,Bikes,Road Bikes
2,Bikes,Touring Bikes
3,Components,Handlebars
4,Components,Bottom Brackets
5,Components,Brakes
6,Components,Chains
7,Components,Cranksets
8,Components,Derailleurs
9,Components,Forks


### 2. List the product names of all Mountain Bikes (subcategory)

In [24]:
### Though Process: 
### Using a INNER JOIN to combine DimProductSubcategory table and DimProduct table
### ON Product SubcategoryKey
### In the SELECT statement output columns subcategory with its corresponding product name
### Filter with WHERE statement for only products with subcategory = 'Mountain Bikes'
sql_query = pd.read_sql_query("""SELECT SubCate.EnglishProductSubcategoryName, 
                                 Prod.EnglishProductName 
                                 FROM [AdventureWorksDW2012].[dbo].[DimProductSubcategory] AS SubCate
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimProduct] AS Prod
                                 ON Prod.ProductSubcategoryKey = SubCate.ProductSubcategoryKey
                                 WHERE SubCate.EnglishProductSubcategoryName = 'Mountain Bikes'""", conn)
sql_query


Unnamed: 0,EnglishProductSubcategoryName,EnglishProductName
0,Mountain Bikes,"Mountain-100 Silver, 38"
1,Mountain Bikes,"Mountain-100 Silver, 42"
2,Mountain Bikes,"Mountain-100 Silver, 44"
3,Mountain Bikes,"Mountain-100 Silver, 48"
4,Mountain Bikes,"Mountain-100 Black, 38"
5,Mountain Bikes,"Mountain-100 Black, 42"
6,Mountain Bikes,"Mountain-100 Black, 44"
7,Mountain Bikes,"Mountain-100 Black, 48"
8,Mountain Bikes,"Mountain-200 Silver, 38"
9,Mountain Bikes,"Mountain-200 Silver, 38"


### 3. How many unique product names belong to the category “clothing”

In [12]:
### Though Process: 
### Using a INNER JOIN DimProductCategory table and DimProduct table
### ON ProductAlternateKey 
### Filtering with a WHERE statement EnglishProductCategoryName = 'Clothing'
### Using SELECT statment with COUNT(1)to get total number of unique products names 

sql_query = pd.read_sql_query("""SELECT COUNT(DISTINCT Sub.EnglishProductSubcategoryName) AS [Clothing Products]
                                 FROM [AdventureWorksDW2012].[dbo].[DimProduct] AS Prod
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimProductSubcategory] AS Sub
                                 ON Prod.ProductSubcategoryKey = Sub.ProductSubcategoryKey
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimProductCategory] AS Cat
                                 ON Cat.ProductCategoryKey = Sub.ProductCategoryKey
                                 WHERE Cat.EnglishProductCategoryName = 'Clothing'""", conn)
sql_query


Unnamed: 0,Clothing Products
0,8


### Using a TOP 100 filter to limit the amount of data pulled from the exploratory query on
### [AdventureWorksDW2012].[dbo].[FactInternetSales] table 
### [AdventureWorksDW2012].[dbo].[DimSalesTerritory] table for questions 4 - 6.

In [3]:
# [AdventureWorksDW2012].[dbo].[FactInternetSales]
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[FactInternetSales]""",conn)
eda_query

Unnamed: 0,ProductKey,OrderDateKey,DueDateKey,ShipDateKey,CustomerKey,PromotionKey,CurrencyKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,...,ProductStandardCost,TotalProductCost,SalesAmount,TaxAmt,Freight,CarrierTrackingNumber,CustomerPONumber,OrderDate,DueDate,ShipDate
0,310,20050701,20050713,20050708,21768,1,19,6,SO43697,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-01,2005-07-13,2005-07-08
1,346,20050701,20050713,20050708,28389,1,39,7,SO43698,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
2,346,20050701,20050713,20050708,25863,1,100,1,SO43699,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
3,336,20050701,20050713,20050708,14501,1,100,4,SO43700,1,...,413.1463,413.1463,699.0982,55.9279,17.4775,,,2005-07-01,2005-07-13,2005-07-08
4,346,20050701,20050713,20050708,11003,1,6,9,SO43701,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,312,20050722,20050803,20050729,16623,1,6,9,SO43792,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-22,2005-08-03,2005-07-29
96,344,20050722,20050803,20050729,11000,1,6,9,SO43793,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
97,347,20050722,20050803,20050729,11029,1,6,9,SO43794,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
98,312,20050723,20050804,20050730,27615,1,100,4,SO43795,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-23,2005-08-04,2005-07-30


In [2]:
# [AdventureWorksDW2012].[dbo].[DimSalesTerritory]
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimSalesTerritory]""",conn)
eda_query

Unnamed: 0,SalesTerritoryKey,SalesTerritoryAlternateKey,SalesTerritoryRegion,SalesTerritoryCountry,SalesTerritoryGroup,SalesTerritoryImage
0,1,1,Northwest,United States,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
1,2,2,Northeast,United States,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
2,3,3,Central,United States,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
3,4,4,Southwest,United States,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
4,5,5,Southeast,United States,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
5,6,6,Canada,Canada,North America,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
6,7,7,France,France,Europe,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
7,8,8,Germany,Germany,Europe,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
8,9,9,Australia,Australia,Pacific,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...
9,10,10,United Kingdom,United Kingdom,Europe,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...


### 4. What is the avg sales amount for each sales territory group (DimSalesTerritory)


In [21]:
# Thought process: 
# Use AVG() funtion on SalesAmount column in FactInternetSales table to get average sales
# Implement a INNER JOIN to combine FactInternetSales and DimSalesTerritory
# ON SalesTerritoryKey unique key
# Use GROUP BY function to group SalesAmount by Group
# Use ORDER BY AVG() sales amount in DESC order

sql_query = pd.read_sql_query("""SELECT Terra.SalesTerritoryGroup AS [Group], 
                                 FORMAT(AVG(Fis.SalesAmount),'C') AS [Average Sales Amount]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimSalesTerritory] AS Terra
                                 ON Terra.SalesTerritoryKey = Fis.SalesTerritoryKey
                                 GROUP BY Terra.SalesTerritoryGroup
                                 ORDER BY AVG(Fis.SalesAmount) DESC""", conn)
sql_query

Unnamed: 0,Group,Average Sales Amount
0,Pacific,$678.98
1,Europe,$493.67
2,North America,$392.47


### 5. What is the avg sales amount for each sales territory country (DimSalesTerritory)

In [22]:
# Thought process: 
# Use AVG() funtion on SalesAmount column in FactInternetSales table to get average sales
# Implement a INNER JOIN to combine FactInternetSales and DimSalesTerritory
# ON SalesTerritoryKey unique key
# Use GROUP BY function to group SalesAmount by Country
# Use ORDER BY AVG() sales amount in DESC order

sql_query = pd.read_sql_query("""SELECT Terra.SalesTerritoryCountry AS [Country], 
                                 FORMAT(AVG(Fis.SalesAmount),'C') AS [Average Sales Amount]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimSalesTerritory] AS Terra
                                 ON Terra.SalesTerritoryKey = Fis.SalesTerritoryKey
                                 GROUP BY Terra.SalesTerritoryCountry
                                 ORDER BY AVG(Fis.SalesAmount) DESC""", conn)
sql_query

Unnamed: 0,Country,Average Sales Amount
0,Australia,$678.98
1,Germany,$514.54
2,United Kingdom,$491.13
3,France,$475.71
4,United States,$439.93
5,Canada,$259.56


### 6. Try 5) but exclude countries that have a total sales amount less then 2 millions

In [27]:
# Thought process: 
# Use AVG() funtion on SalesAmount column in FactInternetSales table to get average sales
# Implement a INNER JOIN to combine FactInternetSales and DimSalesTerritory
# ON SalesTerritoryKey unique key
# Use GROUP BY statement to group SalesAmount by Country
# Use HAVING statement for SUM() sales amount large than 2 million
# Use ORDER BY AVG() sales amount in DESC order

sql_query = pd.read_sql_query("""SELECT Terra.SalesTerritoryCountry AS [Country], 
                                 FORMAT(AVG(Fis.SalesAmount),'C') AS [Average Sales Amount]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 INNER JOIN [AdventureWorksDW2012].[dbo].[DimSalesTerritory] AS Terra
                                 ON Terra.SalesTerritoryKey = Fis.SalesTerritoryKey
                                 GROUP BY Terra.SalesTerritoryCountry
                                 HAVING SUM(Fis.SalesAmount) > 2000000
                                 ORDER BY AVG(Fis.SalesAmount) DESC""", conn)
sql_query

Unnamed: 0,Country,Average Sales Amount
0,Australia,$678.98
1,Germany,$514.54
2,United Kingdom,$491.13
3,France,$475.71
4,United States,$439.93


### Using a TOP 100 filter to limit the amount of data pulled from the exploratory query on
### [AdventureWorksDW2012].[dbo].[FactInternetSales] table
### [AdventureWorksDW2012].[dbo].[DimProduct] table for questions 7 - 9.

In [28]:
# [AdventureWorksDW2012].[dbo].[FactInternetSales] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[FactInternetSales]""",conn)
eda_query

Unnamed: 0,ProductKey,OrderDateKey,DueDateKey,ShipDateKey,CustomerKey,PromotionKey,CurrencyKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,...,ProductStandardCost,TotalProductCost,SalesAmount,TaxAmt,Freight,CarrierTrackingNumber,CustomerPONumber,OrderDate,DueDate,ShipDate
0,310,20050701,20050713,20050708,21768,1,19,6,SO43697,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-01,2005-07-13,2005-07-08
1,346,20050701,20050713,20050708,28389,1,39,7,SO43698,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
2,346,20050701,20050713,20050708,25863,1,100,1,SO43699,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
3,336,20050701,20050713,20050708,14501,1,100,4,SO43700,1,...,413.1463,413.1463,699.0982,55.9279,17.4775,,,2005-07-01,2005-07-13,2005-07-08
4,346,20050701,20050713,20050708,11003,1,6,9,SO43701,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,312,20050722,20050803,20050729,16623,1,6,9,SO43792,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-22,2005-08-03,2005-07-29
96,344,20050722,20050803,20050729,11000,1,6,9,SO43793,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
97,347,20050722,20050803,20050729,11029,1,6,9,SO43794,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
98,312,20050723,20050804,20050730,27615,1,100,4,SO43795,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-23,2005-08-04,2005-07-30


In [29]:
# [AdventureWorksDW2012].[dbo].[DimProduct] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimProduct]""",conn)
eda_query

Unnamed: 0,ProductKey,ProductAlternateKey,ProductSubcategoryKey,WeightUnitMeasureCode,SizeUnitMeasureCode,EnglishProductName,SpanishProductName,FrenchProductName,StandardCost,FinishedGoodsFlag,...,ChineseDescription,ArabicDescription,HebrewDescription,ThaiDescription,GermanDescription,JapaneseDescription,TurkishDescription,StartDate,EndDate,Status
0,1,AR-5381,,,,Adjustable Race,,,,False,...,,,,,,,,1998-06-01,,Current
1,2,BA-8327,,,,Bearing Ball,,,,False,...,,,,,,,,1998-06-01,,Current
2,3,BE-2349,,,,BB Ball Bearing,,,,False,...,,,,,,,,1998-06-01,,Current
3,4,BE-2908,,,,Headset Ball Bearings,,,,False,...,,,,,,,,1998-06-01,,Current
4,5,BL-2036,,,,Blade,,,,False,...,,,,,,,,1998-06-01,,Current
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,LI-5160,,,,Internal Lock Washer 6,,,,False,...,,,,,,,,1998-06-01,,Current
96,97,LI-5800,,,,Internal Lock Washer 10,,,,False,...,,,,,,,,1998-06-01,,Current
97,98,LI-6000,,,,Internal Lock Washer 1,,,,False,...,,,,,,,,1998-06-01,,Current
98,99,LI-7160,,,,Internal Lock Washer 8,,,,False,...,,,,,,,,1998-06-01,,Current


### 7. Get the list of products which had an internet sale

In [70]:
# Thought Process:
# JOIN table FactInternetSales and DimProduct with LEFT JOIN 
# ON ProductKey
# Use SELECT Statement to get values from EnglishProductName, SUM(SalesAmount) to get sale per product 
# Add value to data lets pull sales total for each product with SUM() function
# Sort the values in Product Name 
sql_query = pd.read_sql_query("""SELECT DISTINCT Prod.EnglishProductName AS [Product Name],
                                 FORMAT(COALESCE(SUM(Fis.SalesAmount),0),'C') AS [Total Sales]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 LEFT JOIN [AdventureWorksDW2012].[dbo].[DimProduct] AS Prod
                                 ON Prod.ProductKey = Fis.ProductKey
                                 GROUP BY Prod.EnglishProductName
                                 ORDER BY Prod.EnglishProductName""", conn)
sql_query

Unnamed: 0,Product Name,Total Sales
0,All-Purpose Bike Stand,"$39,591.00"
1,AWC Logo Cap,"$19,688.10"
2,Bike Wash - Dissolver,"$7,218.60"
3,"Classic Vest, L","$12,382.50"
4,"Classic Vest, M","$12,636.50"
...,...,...
125,"Touring-3000 Yellow, 62","$37,117.50"
126,Water Bottle - 30 oz.,"$21,177.56"
127,"Women's Mountain Shorts, L","$25,406.37"
128,"Women's Mountain Shorts, M","$24,636.48"


### 8. Get the internet sales amount for product ‘Adjustable Race’
### (make sure there is a record even if there has never been a sale of this product)

In [72]:
# Thought Process:
# JOIN table FactInternetSales and DimProduct with LEFT JOIN 
# ON ProductKey
# Use SELECT Statement to get values from EnglishProductName, SUM(SalesAmount) to get sale per product 
# Filter results using WHERE clause Product Name = 'Adjustable Race%'
# Using COALESCE nested in FORMAT to change null value to 0 if no sales
sql_query = pd.read_sql_query("""SELECT Prod.EnglishProductName AS [Product Name],
                                 FORMAT(COALESCE(Fis.SalesAmount,0),'C') AS [Total Sales]
                                 FROM [AdventureWorksDW2012].[dbo].[DimProduct] AS Prod
                                 LEFT OUTER JOIN [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 ON Prod.ProductKey = Fis.ProductKey
                                 WHERE Prod.EnglishProductName = 'Adjustable Race'
                                 ORDER BY Prod.EnglishProductName""", conn)
sql_query

Unnamed: 0,Product Name,Total Sales
0,Adjustable Race,$0.00


### 9. Rewrite Previous question (if you used Left outer Join, use Right outer join or vise versa) 

In [74]:
sql_query = pd.read_sql_query("""SELECT Prod.EnglishProductName AS [Product Name],
                                 FORMAT(COALESCE(Fis.SalesAmount,0),'C') AS [Total Sales]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 RIGHT OUTER JOIN [AdventureWorksDW2012].[dbo].[DimProduct] AS Prod
                                 ON Prod.ProductKey = Fis.ProductKey
                                 WHERE Prod.EnglishProductName = 'Adjustable Race'
                                 ORDER BY Prod.EnglishProductName""", conn)
sql_query

Unnamed: 0,Product Name,Total Sales
0,Adjustable Race,$0.00


### Using a TOP 100 filter to limit the amount of data pulled from the exploratory query on
### [AdventureWorksDW2012].[dbo].[DimPromotion] table¶
### [AdventureWorksDW2012].[dbo].[FactInternetSales] table for questions 10 - 13.

In [75]:
# [AdventureWorksDW2012].[dbo].[DimProduct] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[DimPromotion]""",conn)
eda_query

Unnamed: 0,PromotionKey,PromotionAlternateKey,EnglishPromotionName,SpanishPromotionName,FrenchPromotionName,DiscountPct,EnglishPromotionType,SpanishPromotionType,FrenchPromotionType,EnglishPromotionCategory,SpanishPromotionCategory,FrenchPromotionCategory,StartDate,EndDate,MinQty,MaxQty
0,1,1,No Discount,Sin descuento,Aucune remise,0.0,No Discount,Sin descuento,Aucune remise,No Discount,Sin descuento,Aucune remise,2005-06-01,2008-12-31,0,
1,2,2,Volume Discount 11 to 14,Descuento por volumen (entre 11 y 14),Remise sur quantité (de 11 à 14),0.02,Volume Discount,Descuento por volumen,Remise sur quantité,Reseller,Distribuidor,Revendeur,2005-07-01,2008-06-30,11,14.0
2,3,3,Volume Discount 15 to 24,Descuento por volumen (entre 15 y 24),Remise sur quantité (de 15 à 24),0.05,Volume Discount,Descuento por volumen,Remise sur quantité,Reseller,Distribuidor,Revendeur,2005-07-01,2008-06-30,15,24.0
3,4,4,Volume Discount 25 to 40,Descuento por volumen (entre 25 y 40),Remise sur quantité (de 25 à 40),0.1,Volume Discount,Descuento por volumen,Remise sur quantité,Reseller,Distribuidor,Revendeur,2005-07-01,2008-06-30,25,40.0
4,5,5,Volume Discount 41 to 60,Descuento por volumen (entre 41 y 60),Remise sur quantité (de 41 à 60),0.15,Volume Discount,Descuento por volumen,Remise sur quantité,Reseller,Distribuidor,Revendeur,2005-07-01,2008-06-30,41,60.0
5,6,6,Volume Discount over 60,Descuento por volumen (más de 60),Remise sur quantité (au-delà de 60),0.2,Volume Discount,Descuento por volumen,Remise sur quantité,Reseller,Distribuidor,Revendeur,2005-07-01,2008-06-30,61,
6,7,7,Mountain-100 Clearance Sale,"Liquidación de bicicleta de montaña, 100",Liquidation VTT 100,0.35,Discontinued Product,Descatalogado,Ce produit n'est plus commercialisé,Reseller,Distribuidor,Revendeur,2006-05-15,2006-06-30,0,
7,8,8,Sport Helmet Discount-2002,"Casco deportivo, descuento: 2002",Remise sur les casques sport - 2002,0.1,Seasonal Discount,Descuento de temporada,Remise saisonnière,Reseller,Distribuidor,Revendeur,2006-07-01,2006-07-31,0,
8,9,9,Road-650 Overstock,"Bicicleta de carretera: 650, oferta especial",Déstockage Vélo de route 650,0.3,Excess Inventory,Inventario excedente,Déstockage,Reseller,Distribuidor,Revendeur,2006-07-01,2006-08-31,0,
9,10,10,Mountain Tire Sale,Oferta de cubierta de montaña,Vente de pneus de VTT,0.5,Excess Inventory,Inventario excedente,Déstockage,Customer,Cliente,Client,2007-06-15,2007-08-30,0,


In [76]:
# [AdventureWorksDW2012].[dbo].[DimProduct] table
eda_query = pd.read_sql_query("""SELECT TOP 100 * FROM [AdventureWorksDW2012].[dbo].[FactInternetSales]""",conn)
eda_query

Unnamed: 0,ProductKey,OrderDateKey,DueDateKey,ShipDateKey,CustomerKey,PromotionKey,CurrencyKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,...,ProductStandardCost,TotalProductCost,SalesAmount,TaxAmt,Freight,CarrierTrackingNumber,CustomerPONumber,OrderDate,DueDate,ShipDate
0,310,20050701,20050713,20050708,21768,1,19,6,SO43697,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-01,2005-07-13,2005-07-08
1,346,20050701,20050713,20050708,28389,1,39,7,SO43698,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
2,346,20050701,20050713,20050708,25863,1,100,1,SO43699,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
3,336,20050701,20050713,20050708,14501,1,100,4,SO43700,1,...,413.1463,413.1463,699.0982,55.9279,17.4775,,,2005-07-01,2005-07-13,2005-07-08
4,346,20050701,20050713,20050708,11003,1,6,9,SO43701,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-01,2005-07-13,2005-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,312,20050722,20050803,20050729,16623,1,6,9,SO43792,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-22,2005-08-03,2005-07-29
96,344,20050722,20050803,20050729,11000,1,6,9,SO43793,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
97,347,20050722,20050803,20050729,11029,1,6,9,SO43794,1,...,1912.1544,1912.1544,3399.9900,271.9992,84.9998,,,2005-07-22,2005-08-03,2005-07-29
98,312,20050723,20050804,20050730,27615,1,100,4,SO43795,1,...,2171.2942,2171.2942,3578.2700,286.2616,89.4568,,,2005-07-23,2005-08-04,2005-07-30


### 10. Get the total internet sales resulted from the promotion 'Half-Price Pedal Sale'

In [19]:
# Thought Process:
# Use RIGHT JOIN to combine FactInternetSales and DimPromotion table 
# ON PromotionKey
# Filter to show only data using WHERE clause for Half-Price Pedal Sale promo was used
# In the SELECT statement use the SUM() function to get total for internet Sales
# Adding COALESCE(column_name,0) incase no sale is recorded for promo code
sql_query = pd.read_sql_query("""SELECT Promo.EnglishPromotionName,
                                 FORMAT(COALESCE(SUM(Fis.SalesAmount),0),'C') AS [Total Sales],
                                 COUNT(1) AS [Promo Usage Frequency]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 RIGHT JOIN [AdventureWorksDW2012].[dbo].[DimPromotion] AS Promo
                                 ON Promo.PromotionKey = Fis.PromotionKey
                                 WHERE Promo.EnglishPromotionName = 'Half-Price Pedal Sale'
                                 GROUP BY Promo.EnglishPromotionName""", conn)
sql_query

Unnamed: 0,EnglishPromotionName,Total Sales,Promo Usage Frequency
0,Half-Price Pedal Sale,$0.00,1


### 11. Rewrite Previous question (if you used Left outer Join, use Right outer join or vise versa) 

In [20]:
# Thought Process:
# Use LEFT JOIN to combine FactInternetSales and DimPromotion table 
# ON PromotionKey
# Filter to show only data using WHERE clause for Half-Price Pedal Sale promo was used
# In the SELECT statement use the SUM() function to get total for internet Sales
# Adding COALESCE(column_name,0) incase no sale is recorded for promo code
sql_query = pd.read_sql_query("""SELECT Promo.EnglishPromotionName,
                                 FORMAT(COALESCE(SUM(Fis.SalesAmount),0),'C') AS [Total Sales],
                                 COUNT(1) AS [Promo Usage Frequency]
                                 FROM[AdventureWorksDW2012].[dbo].[DimPromotion] AS Promo
                                 LEFT JOIN [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 ON Promo.PromotionKey = Fis.PromotionKey
                                 WHERE Promo.EnglishPromotionName = 'Half-Price Pedal Sale'
                                 GROUP BY Promo.EnglishPromotionName""", conn)
sql_query

Unnamed: 0,EnglishPromotionName,Total Sales,Promo Usage Frequency
0,Half-Price Pedal Sale,$0.00,1


### 12. Get the promotion names which never resulted in an internet sale

In [30]:
# Thought Process:
# Use LEFT JOIN to combine FactInternetSales and DimPromotion table 
# ON PromotionKey
# Filter data using HAAVING clause for promo codes with no sales
# Using COALESCE to turn NULL value to 0 then format as currency with FORMAT(column_name,'C')
# In the SELECT statement use the SUM() function to get total for internet Sales
# Also include Promo Category and Promo Name
sql_query = pd.read_sql_query("""SELECT Promo.EnglishPromotionCategory AS [Promo Category],
                                 Promo.EnglishPromotionName AS [Promo Code Name],
                                 FORMAT(COALESCE(SUM(Fis.SalesAmount),0),'C') AS [Total Sales],
                                 COUNT(1) AS [Promo Usage Frequency]
                                 FROM[AdventureWorksDW2012].[dbo].[DimPromotion] AS Promo
                                 LEFT JOIN [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 ON Promo.PromotionKey = Fis.PromotionKey
                                 GROUP BY Promo.EnglishPromotionCategory, Promo.EnglishPromotionName
                                 HAVING COALESCE(SUM(Fis.SalesAmount),0) = 0
                                 ORDER BY Promo.EnglishPromotionName""", conn)
sql_query

Unnamed: 0,Promo Category,Promo Code Name,Total Sales,Promo Usage Frequency
0,Customer,Half-Price Pedal Sale,$0.00,1
1,Reseller,LL Road Frame Sale,$0.00,1
2,Customer,Mountain Tire Sale,$0.00,1
3,Reseller,Mountain-100 Clearance Sale,$0.00,1
4,Reseller,Mountain-500 Silver Clearance Sale,$0.00,1
5,Reseller,Road-650 Overstock,$0.00,1
6,Reseller,Sport Helmet Discount-2002,$0.00,1
7,Reseller,Sport Helmet Discount-2003,$0.00,1
8,Reseller,Volume Discount 15 to 24,$0.00,1
9,Reseller,Volume Discount 25 to 40,$0.00,1


### 13. Rewrite Previous question (if you used Left outer Join, use Right outer 
### join or vise versa) 

In [33]:
# Thought Process:
# Use RIGHT OUTER JOIN to combine FactInternetSales and DimPromotion table 
# ON PromotionKey
# Filter data using HAAVING clause for promo codes with no sales
# Using COALESCE to turn NULL value to 0 then format as currency with FORMAT(column_name,'C')
# In the SELECT statement use the SUM() function to get total for internet Sales
# Also include Promo Category and Promo Name
sql_query = pd.read_sql_query("""SELECT Promo.EnglishPromotionCategory AS [Promo Category],
                                 Promo.EnglishPromotionName AS [Promo Code Name],
                                 FORMAT(COALESCE(SUM(Fis.SalesAmount),0),'C') AS [Total Sales],
                                 COUNT(1) AS [Promo Usage Frequency]
                                 FROM [AdventureWorksDW2012].[dbo].[FactInternetSales] AS Fis
                                 RIGHT OUTER JOIN [AdventureWorksDW2012].[dbo].[DimPromotion] AS Promo
                                 ON Promo.PromotionKey = Fis.PromotionKey
                                 GROUP BY Promo.EnglishPromotionCategory, Promo.EnglishPromotionName
                                 HAVING COALESCE(SUM(Fis.SalesAmount),0) = 0
                                 ORDER BY Promo.EnglishPromotionName""", conn)
sql_query

Unnamed: 0,Promo Category,Promo Code Name,Total Sales,Promo Usage Frequency
0,Customer,Half-Price Pedal Sale,$0.00,1
1,Reseller,LL Road Frame Sale,$0.00,1
2,Customer,Mountain Tire Sale,$0.00,1
3,Reseller,Mountain-100 Clearance Sale,$0.00,1
4,Reseller,Mountain-500 Silver Clearance Sale,$0.00,1
5,Reseller,Road-650 Overstock,$0.00,1
6,Reseller,Sport Helmet Discount-2002,$0.00,1
7,Reseller,Sport Helmet Discount-2003,$0.00,1
8,Reseller,Volume Discount 15 to 24,$0.00,1
9,Reseller,Volume Discount 25 to 40,$0.00,1
