In [91]:
# Importing Libraries


import pandas as pd
import numpy as np
import pyodbc
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from itertools import combinations, groupby
from collections import Counter




In [96]:
# connecting to SQL server

pyodbc.connect

cnxn_str = ("Driver={SQL Server Native Client 11.0};"
            "Server=DESKTOP-TI5OC9C;"
            "Database=AdventureWorks2012;"
            "Trusted_Connection=yes;")


cnxn = pyodbc.connect(cnxn_str)


In [97]:
# Importing data

dataset = pd.read_sql("select [A].[Name] as [Item],[D].[OrderQty] as [Quantity],[E].[SalesOrderID] as [TransactionID],[F].[Name] as [Region] from [Production].[Product] as [A]LEFT JOIN [Production].[ProductSubcategory] as [B] on [A].[ProductSubcategoryID] = [B].[ProductSubcategoryID]LEFT JOIN [Production].[ProductCategory] as [C] on [B].[ProductCategoryID] = [C].[ProductCategoryID]LEFT JOIN [Sales].[SalesOrderDetail] as [D] on [A].[ProductID] = [D].[ProductID]LEFT JOIN [Sales].[SalesOrderHeader] as [E] on [D].[SalesOrderID] = [E].[SalesOrderID]LEFT JOIN [Sales].[SalesTerritory] as [F] on [E].[TerritoryID] = [F].[TerritoryID] where OnlineOrderFlag = 1 " , cnxn)
dataset.head(10)

Unnamed: 0,Item,Quantity,TransactionID,Region
0,"Road-150 Red, 62",1,43697,Canada
1,"Mountain-100 Silver, 44",1,43698,France
2,"Mountain-100 Silver, 44",1,43699,Northwest
3,"Road-650 Black, 62",1,43700,Southwest
4,"Mountain-100 Silver, 44",1,43701,Australia
5,"Road-150 Red, 44",1,43702,Southwest
6,"Road-150 Red, 62",1,43703,Australia
7,"Mountain-100 Black, 48",1,43704,Australia
8,"Mountain-100 Silver, 38",1,43705,Australia
9,"Road-150 Red, 48",1,43706,Southwest


### Data Preparations

In [98]:
dataset.shape

(60398, 4)

In [99]:
dataset['Item'] = dataset['Item'].str.strip() # removing spaces
dataset.head()

Unnamed: 0,Item,Quantity,TransactionID,Region
0,"Road-150 Red, 62",1,43697,Canada
1,"Mountain-100 Silver, 44",1,43698,France
2,"Mountain-100 Silver, 44",1,43699,Northwest
3,"Road-650 Black, 62",1,43700,Southwest
4,"Mountain-100 Silver, 44",1,43701,Australia


In [100]:
dataset.dropna(axis=0, subset=['TransactionID'], inplace=True)# removing duplicates
dataset.head()

Unnamed: 0,Item,Quantity,TransactionID,Region
0,"Road-150 Red, 62",1,43697,Canada
1,"Mountain-100 Silver, 44",1,43698,France
2,"Mountain-100 Silver, 44",1,43699,Northwest
3,"Road-650 Black, 62",1,43700,Southwest
4,"Mountain-100 Silver, 44",1,43701,Australia


In [101]:
dataset['TransactionID'] = dataset['TransactionID'].astype('str')# converting to string
dataset.head()

Unnamed: 0,Item,Quantity,TransactionID,Region
0,"Road-150 Red, 62",1,43697,Canada
1,"Mountain-100 Silver, 44",1,43698,France
2,"Mountain-100 Silver, 44",1,43699,Northwest
3,"Road-650 Black, 62",1,43700,Southwest
4,"Mountain-100 Silver, 44",1,43701,Australia


In [102]:
dataset.shape

(60398, 4)

In [103]:
dataset['Region'].value_counts()

Australia         13345
Southwest         12265
Northwest          8993
Canada             7620
United Kingdom     6906
Germany            5625
France             5558
Southeast            39
Northeast            27
Central              20
Name: Region, dtype: int64

In [104]:
print(dataset.isnull().values.any())

False


In [105]:
missing_data = dataset.isnull().sum()
missing_data

Item             0
Quantity         0
TransactionID    0
Region           0
dtype: int64

In [106]:
unique_values= dataset.nunique()
print ("The unique values for each Column are: ")

print (unique_values)

The unique values for each Column are: 
Item               130
Quantity             1
TransactionID    27659
Region              10
dtype: int64


In [107]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 60398 entries, 0 to 60397
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Item           60398 non-null  object
 1   Quantity       60398 non-null  int64 
 2   TransactionID  60398 non-null  object
 3   Region         60398 non-null  object
dtypes: int64(1), object(3)
memory usage: 2.3+ MB


In [108]:
data_plus = dataset[dataset['Quantity']>=0]
data_plus.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 60398 entries, 0 to 60397
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Item           60398 non-null  object
 1   Quantity       60398 non-null  int64 
 2   TransactionID  60398 non-null  object
 3   Region         60398 non-null  object
dtypes: int64(1), object(3)
memory usage: 2.3+ MB


In [110]:
## selecting All Regions group ['Transaction ID', 'Product_Name'] and sum by Quality

market_basket = (data_plus
                .groupby(['TransactionID', 'Item'])['Quantity']
                .sum().unstack().reset_index().fillna(0)
                .set_index('TransactionID'))


In [112]:
# encoding as per associatios Rule-all positive values to 1 and rest to 0

def encode_data(datapoint):
    if datapoint <= 0:
        return 0
    if datapoint >= 1:
        return 1

In [120]:
market_basket = market_basket.applymap(encode_data)
market_basket

Item,AWC Logo Cap,All-Purpose Bike Stand,Bike Wash - Dissolver,"Classic Vest, L","Classic Vest, M","Classic Vest, S",Fender Set - Mountain,HL Mountain Tire,HL Road Tire,"Half-Finger Gloves, L",...,"Touring-3000 Blue, 62","Touring-3000 Yellow, 44","Touring-3000 Yellow, 50","Touring-3000 Yellow, 54","Touring-3000 Yellow, 58","Touring-3000 Yellow, 62",Water Bottle - 30 oz.,"Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S"
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43697,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43698,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43699,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43700,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43701,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75119,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
75120,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
75121,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
75122,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Trainin Model

In [135]:
# Generating Frequent Itemsets

MyFrequentItemsets = apriori(market_basket, min_support=0.02, use_colnames=True)

In [136]:
# Creating Association Rules

rules = association_rules(MyFrequentItemsets, metric="lift", min_threshold= 2.2)
rules.head(20)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HL Mountain Tire),(Mountain Tire Tube),0.050472,0.111898,0.0346,0.68553,6.126358,0.028952,2.824122
1,(Mountain Tire Tube),(HL Mountain Tire),0.111898,0.050472,0.0346,0.309208,6.126358,0.028952,1.374551
2,(Road Tire Tube),(HL Road Tire),0.085903,0.031021,0.021548,0.250842,8.086284,0.018883,1.293424
3,(HL Road Tire),(Road Tire Tube),0.031021,0.085903,0.021548,0.694639,8.086284,0.018883,2.993492
4,(Road Tire Tube),(LL Road Tire),0.085903,0.037745,0.020066,0.233586,6.188459,0.016823,1.255528
5,(LL Road Tire),(Road Tire Tube),0.037745,0.085903,0.020066,0.531609,6.188459,0.016823,1.951568
6,(Mountain Tire Tube),(ML Mountain Tire),0.111898,0.041975,0.028056,0.250727,5.973176,0.023359,1.278605
7,(ML Mountain Tire),(Mountain Tire Tube),0.041975,0.111898,0.028056,0.668389,5.973176,0.023359,2.678145
8,(Road Tire Tube),(ML Road Tire),0.085903,0.033479,0.02191,0.255051,7.618188,0.019034,1.297431
9,(ML Road Tire),(Road Tire Tube),0.033479,0.085903,0.02191,0.654428,7.618188,0.019034,2.645167


### Making Recommendaions

In [137]:
market_basket['Touring Tire Tube'].sum()

1488

In [138]:
market_basket['Touring Tire'].sum()

935

#### For the item 'Touring Tire Tube'  sold 1488 and out of which 935 'Touring Tire' has been bought which shows a good combination and is best to recommend it to the one who is buying 'Touring Tire Tube'

In [139]:
market_basket['Mountain Bottle Cage'].sum()

2025

In [141]:
market_basket['LL Road Tire'].sum()

1044