<a href="https://colab.research.google.com/github/DABallentine/knowledge_discovery_charlotte/blob/main/Jupiter%20Notebooks/Association_Rule_Mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Association Rule Mining

This Notebook takes the merged service requests and creates association rules on the data

In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
!pip install apyori
from apyori import apriori
%matplotlib inline
plt.style.use('ggplot')

# Function to read data from URL
def read_data_from_URL(url):
    df=pd.read_csv(url)
    return df



In [139]:
#import the association rule packages
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth, association_rules

In [2]:
# Read in the merged, preprocessed data
serv_req_url='https://bitbucket.org/nthammad-uncc/knowledge_discovery_charlotte/raw/main/data/merged_requests.zip'
requests = read_data_from_URL(serv_req_url)
requests.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,DEPARTMENT,REQUEST_NO,REQUEST_CAT,REQUEST_TYPE,RECEIVED_DATE,RECEIVED_MONTH,RECEIVED_YEAR,SEASON,...,PERCENT HOUSEHOLD INCOME_Higher Income Households,PERCENT HOUSEHOLD INCOME_Retired Householders,INCOME AND BENEFITS_Median households income (dollars),INCOME AND BENEFITS_Mean households income (dollars),INCOME AND BENEFITS_Per capita Income,INCOME AND BENEFITS_Median earnings for workers (dollars),PERCENT INSURED_Population with health insurance,PERCENT INSURED_Population without health insurance,PERCENTAGE BELOW POVERTY LEVEL_All,HISTORIC_REDLINING
0,0,1,Solid Waste Services,6402056,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,0.210336,0.157483,79300.428571,111885.642857,47529.928571,43332.642857,0.917971,0.082029,7.135714,
1,1,2,Housing and NBHD Serv,6402064,UTILITY VERIFICATION LTR,UTILITY VERIFICATION LTR,2016-11-07 14:00:00,11,2016,4,...,0.084375,0.129942,63391.25,74852.45,29914.9,37226.3,0.869184,0.130816,8.05,
2,2,3,Solid Waste Services,6402082,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,0.03753,0.113167,41973.411765,51723.764706,19338.294118,23576.882353,0.814082,0.185918,25.811765,
3,3,4,Solid Waste Services,6402103,MISSED SERVICE,MISSED RECYCLING,2016-11-07 14:00:00,11,2016,4,...,0.053345,0.128969,46401.066667,59119.666667,22976.8,28399.6,0.834642,0.165358,16.96,
4,4,6,Finance/City,6402112,CWP,CWP REQUEST,2016-11-07 14:00:00,11,2016,4,...,0.050732,0.097692,42745.529412,53659.352941,23808.882353,29559.117647,0.825033,0.174967,20.682353,Y


In [3]:
#list all column names
requests.columns.values

array(['Unnamed: 0', 'OBJECTID', 'DEPARTMENT', 'REQUEST_NO',
       'REQUEST_CAT', 'REQUEST_TYPE', 'RECEIVED_DATE', 'RECEIVED_MONTH',
       'RECEIVED_YEAR', 'SEASON', 'INTERNAL_FIELD_OBSERVATION',
       'NEIGHBORHOOD_PROFILE_AREA', 'BLOCK_NO', 'ZIP_CODE', 'X_COORD',
       'Y_COORD', 'LATITUDE', 'LONGITUDE', 'RECEIVED_DAY', 'TOTAL_CALLS',
       'ADDRESS_ID', 'CRIME_SCORE', 'EMPLOYED_All',
       'PERCENT EMPLOYED_In labor force',
       'PERCENT EMPLOYED_Not in labor force',
       'PERCENT EMPLOYED_Female Only', 'COMMUTING TO WORK_All',
       'PERCENT COMMUTING TO WORK_By Car',
       'PERCENT COMMUTING TO WORK_Public transportation',
       'PERCENT COMMUTING TO WORK_Walk',
       'PERCENT COMMUTING TO WORK_Other',
       'PERCENT COMMUTING TO WORK_Worked at home',
       'INCOME AND BENEFITS_Total households',
       'PERCENT HOUSEHOLD INCOME_Lower Income Households',
       'PERCENT HOUSEHOLD INCOME_Mid Income Households',
       'PERCENT HOUSEHOLD INCOME_Higher Income Househol

In [4]:
#calculate the day of the week to make the request count per neighborhood profile area lesser
requests['RECEIVED_DAY_OF_WEEK'] = pd.to_datetime(requests['RECEIVED_DATE']).dt.day_name()
requests['RECEIVED_MONTH']=requests['RECEIVED_MONTH'].apply(lambda x:str(x))

In [5]:
#function to calculate if the income bracket is predominantly low, mid or high
def calculate_income_bracket(low,mid,high):
    if low>mid and low>high:
        return 'Low'
    elif mid>high:
        return 'Mid'
    else:
        return 'High'

In [6]:
requests['PREDOMINANT_INCOME_BRACKET']=requests[['PERCENT HOUSEHOLD INCOME_Lower Income Households','PERCENT HOUSEHOLD INCOME_Mid Income Households','PERCENT HOUSEHOLD INCOME_Higher Income Households']].apply(lambda x: calculate_income_bracket(*x),axis=1)


In [7]:
requests['PREDOMINANT_INCOME_BRACKET'].value_counts()

Mid     1491811
Low       29346
High       2904
Name: PREDOMINANT_INCOME_BRACKET, dtype: int64

In [8]:
requests['CRIME_SCORE']=requests['CRIME_SCORE'].apply(lambda x: round(x,3))
requests['NEIGHBORHOOD_PROFILE_AREA']=requests['NEIGHBORHOOD_PROFILE_AREA'].apply(lambda x: str(int(x)))

In [9]:
max_crime_score=requests['CRIME_SCORE'].max()
requests['CRIME_INDEX']=requests['CRIME_SCORE'].apply(lambda x: 'Low' if (x/max_crime_score)<=0.33 else ('Mid' if (x/max_crime_score)<=0.66 else 'High'))

In [10]:
requests.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,DEPARTMENT,REQUEST_NO,REQUEST_CAT,REQUEST_TYPE,RECEIVED_DATE,RECEIVED_MONTH,RECEIVED_YEAR,SEASON,...,INCOME AND BENEFITS_Mean households income (dollars),INCOME AND BENEFITS_Per capita Income,INCOME AND BENEFITS_Median earnings for workers (dollars),PERCENT INSURED_Population with health insurance,PERCENT INSURED_Population without health insurance,PERCENTAGE BELOW POVERTY LEVEL_All,HISTORIC_REDLINING,RECEIVED_DAY_OF_WEEK,PREDOMINANT_INCOME_BRACKET,CRIME_INDEX
0,0,1,Solid Waste Services,6402056,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,111885.642857,47529.928571,43332.642857,0.917971,0.082029,7.135714,,Monday,Mid,Low
1,1,2,Housing and NBHD Serv,6402064,UTILITY VERIFICATION LTR,UTILITY VERIFICATION LTR,2016-11-07 14:00:00,11,2016,4,...,74852.45,29914.9,37226.3,0.869184,0.130816,8.05,,Monday,Mid,Low
2,2,3,Solid Waste Services,6402082,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,51723.764706,19338.294118,23576.882353,0.814082,0.185918,25.811765,,Monday,Mid,Low
3,3,4,Solid Waste Services,6402103,MISSED SERVICE,MISSED RECYCLING,2016-11-07 14:00:00,11,2016,4,...,59119.666667,22976.8,28399.6,0.834642,0.165358,16.96,,Monday,Mid,Low
4,4,6,Finance/City,6402112,CWP,CWP REQUEST,2016-11-07 14:00:00,11,2016,4,...,53659.352941,23808.882353,29559.117647,0.825033,0.174967,20.682353,Y,Monday,Mid,Low


## Association rules between Request Categories and Neighborhood Profile Areas

In [32]:
requests['REQUEST_CAT'].value_counts()

NON_RECYCLABLE ITEMS         820199
RECYCLABLE ITEMS             196945
CART                         107771
HNS HEALTH AND SANITATION     77723
TRANSPORTATION                52467
MISSED SERVICE                43400
GARBAGE                       30604
311 DOCUMENT                  22379
RECYCLING                     22200
VIOLATIONS                    15821
YARD WASTE                    15796
TIRES                         13440
WEATHER AND ENVIRONMENTAL     11641
ZONING                        11410
DEAD ANIMAL COLLECTION        10701
ZONING/COMPLAINT/ INSPECT      9994
COMPLAINT                      8486
HNS HOUSING                    8161
BULKY ITEM                     8045
ZON GENERAL INFO  INQUIRY      7894
CUSTOMER FEEDBACK              6835
BLOCKAGE                       4462
CLEANLINESS                    3447
CWP                            2645
DISABILITY ACT                 2525
ADMINISTRATIVE ACTIONS         2326
ESCALATION REQUEST             1791
UTILITY VERIFICATION LTR    

In [114]:
#create a column to combine month, year and neighborhood profile area:
requests['MERGE_INDEX']=requests['RECEIVED_MONTH'].astype('str')+'_'+requests['RECEIVED_YEAR'].astype('str')+'_'+requests['NEIGHBORHOOD_PROFILE_AREA'].astype('str')
requests['REQ_COUNT']=1

In [115]:
requests.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,DEPARTMENT,REQUEST_NO,REQUEST_CAT,REQUEST_TYPE,RECEIVED_DATE,RECEIVED_MONTH,RECEIVED_YEAR,SEASON,...,INCOME AND BENEFITS_Median earnings for workers (dollars),PERCENT INSURED_Population with health insurance,PERCENT INSURED_Population without health insurance,PERCENTAGE BELOW POVERTY LEVEL_All,HISTORIC_REDLINING,RECEIVED_DAY_OF_WEEK,PREDOMINANT_INCOME_BRACKET,CRIME_INDEX,MERGE_INDEX,REQ_COUNT
0,0,1,Solid Waste Services,6402056,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,43332.642857,0.917971,0.082029,7.135714,,Monday,Mid,Low,11_2016_215,1
1,1,2,Housing and NBHD Serv,6402064,UTILITY VERIFICATION LTR,UTILITY VERIFICATION LTR,2016-11-07 14:00:00,11,2016,4,...,37226.3,0.869184,0.130816,8.05,,Monday,Mid,Low,11_2016_265,1
2,2,3,Solid Waste Services,6402082,NON_RECYCLABLE ITEMS,NON_RECYCLABLE ITEMS,2016-11-07 14:00:00,11,2016,4,...,23576.882353,0.814082,0.185918,25.811765,,Monday,Mid,Low,11_2016_103,1
3,3,4,Solid Waste Services,6402103,MISSED SERVICE,MISSED RECYCLING,2016-11-07 14:00:00,11,2016,4,...,28399.6,0.834642,0.165358,16.96,,Monday,Mid,Low,11_2016_158,1
4,4,6,Finance/City,6402112,CWP,CWP REQUEST,2016-11-07 14:00:00,11,2016,4,...,29559.117647,0.825033,0.174967,20.682353,Y,Monday,Mid,Low,11_2016_88,1


In [175]:
#create a matrix of merge index and category with the values as sum of requests(number of requests of 
# a particular category raised per month of a year for a particular neighborhood profile area) 
request_basket = (requests
          .groupby(['MERGE_INDEX','REQUEST_CAT'])['REQ_COUNT']
          .sum().unstack().reset_index().fillna(0)
          .set_index('MERGE_INDEX'))
request_basket.head(20)

REQUEST_CAT,311 DOCUMENT,ADMINISTRATIVE ACTIONS,ADOPT-A PICKUP SWS,ANIMAL FOUND REPORT,BLOCKAGE,BOARDED UP STRUCTURE,BULKY ITEM,CART,CLEANLINESS,COMPLAINT,...,SW ONLY-DOOR HANGER LEFT,TIRES,TRANSPORTATION,UTILITY VERIFICATION LTR,VIOLATIONS,WEATHER AND ENVIRONMENTAL,YARD WASTE,ZON GENERAL INFO INQUIRY,ZONING,ZONING/COMPLAINT/ INSPECT
MERGE_INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10_2016_10,3.0,2.0,0.0,0.0,0.0,0.0,2.0,18.0,0.0,0.0,...,0.0,1.0,2.0,0.0,1.0,1.0,1.0,1.0,4.0,1.0
10_2016_100,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0
10_2016_101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10_2016_102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0
10_2016_103,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
10_2016_105,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10_2016_106,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10_2016_107,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10_2016_108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10_2016_109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


In [117]:
#check the min and max using describe
request_basket.describe()

REQUEST_CAT,311 DOCUMENT,ADMINISTRATIVE ACTIONS,ADOPT-A PICKUP SWS,ANIMAL FOUND REPORT,BLOCKAGE,BOARDED UP STRUCTURE,BULKY ITEM,CART,CLEANLINESS,COMPLAINT,...,SW ONLY-DOOR HANGER LEFT,TIRES,TRANSPORTATION,UTILITY VERIFICATION LTR,VIOLATIONS,WEATHER AND ENVIRONMENTAL,YARD WASTE,ZON GENERAL INFO INQUIRY,ZONING,ZONING/COMPLAINT/ INSPECT
count,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,...,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0,26271.0
mean,0.851852,0.088539,0.003883,0.045487,0.169845,0.008565,0.306231,4.10228,0.131209,0.323018,...,0.014084,0.511591,1.997145,0.054851,0.602223,0.443112,0.601271,0.300483,0.434319,0.380419
std,1.191478,0.366885,0.071847,0.218714,0.464095,0.09969,0.670135,6.281683,0.409941,0.629465,...,0.205444,0.978805,2.66461,0.269497,1.459714,0.939404,1.241221,0.649909,0.842676,0.987129
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,...,0.0,1.0,3.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0
max,14.0,8.0,4.0,4.0,9.0,3.0,9.0,114.0,6.0,7.0,...,15.0,15.0,49.0,6.0,29.0,30.0,30.0,6.0,15.0,55.0


In [118]:
request_basket.columns

Index(['311 DOCUMENT', 'ADMINISTRATIVE ACTIONS', 'ADOPT-A PICKUP SWS',
       'ANIMAL FOUND REPORT', 'BLOCKAGE', 'BOARDED UP STRUCTURE', 'BULKY ITEM',
       'CART', 'CLEANLINESS', 'COMPLAINT', 'CONTENT IMPROVEMENT PROG',
       'CUSTOMER FEEDBACK', 'CWP', 'DEAD ANIMAL COLLECTION', 'DISABILITY ACT',
       'DISABLED SERVICE', 'ESCALATION REQUEST', 'FIELD OBSERVED PROBLEM',
       'GARBAGE', 'HNS HEALTH AND SANITATION', 'HNS HOUSING', 'MISSED SERVICE',
       'NON_RECYCLABLE ITEMS', 'PARK & REC SVC REQUEST', 'RECYCLABLE ITEMS',
       'RECYCLING', 'REQUEST FOR SPEAKER', 'STW CNTY WATER QUALITY',
       'STW OTHER', 'SW ONLY-DOOR HANGER LEFT', 'TIRES', 'TRANSPORTATION',
       'UTILITY VERIFICATION LTR', 'VIOLATIONS', 'WEATHER AND ENVIRONMENTAL',
       'YARD WASTE', 'ZON GENERAL INFO  INQUIRY', 'ZONING',
       'ZONING/COMPLAINT/ INSPECT'],
      dtype='object', name='REQUEST_CAT')

In [119]:
#function to encode the value of a cell as 1 if it is >=1 and as 0 if it is <=0
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

    
#update the values in the request_basket_sets
request_basket_sets = request_basket.applymap(encode_units)

In [120]:
request_basket_sets.head(20)

REQUEST_CAT,311 DOCUMENT,ADMINISTRATIVE ACTIONS,ADOPT-A PICKUP SWS,ANIMAL FOUND REPORT,BLOCKAGE,BOARDED UP STRUCTURE,BULKY ITEM,CART,CLEANLINESS,COMPLAINT,...,SW ONLY-DOOR HANGER LEFT,TIRES,TRANSPORTATION,UTILITY VERIFICATION LTR,VIOLATIONS,WEATHER AND ENVIRONMENTAL,YARD WASTE,ZON GENERAL INFO INQUIRY,ZONING,ZONING/COMPLAINT/ INSPECT
MERGE_INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10_2016_10,1,1,0,0,0,0,1,1,0,0,...,0,1,1,0,1,1,1,1,1,1
10_2016_100,1,0,0,0,0,0,0,1,0,1,...,0,0,1,0,0,0,1,0,1,0
10_2016_101,0,0,0,0,0,0,0,1,1,0,...,0,0,1,0,0,0,1,0,0,0
10_2016_102,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,1,1,0,0
10_2016_103,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
10_2016_105,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
10_2016_106,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
10_2016_107,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10_2016_108,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
10_2016_109,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,1,1


### Association Rules concepts:
"Support" is the relative frequency that the rules show up. In many instances, you may want to look for high support in order to make sure it is a useful relationship. However, there may be instances where a low support is useful if you are trying to find “hidden” relationships.

"Confidence" is a measure of the reliability of the rule. A confidence of .5 in in an association rule X --> Y would mean that in 50% of the cases where X occurs, Y also occurs. Confidence that can be used as a threshold differs from industry to industry and case to case. For product recommendation, a 50% confidence may be perfectly acceptable but in a medical situation, this level may not be high enough.

"Lift" is the ratio of the observed support to that expected if the two rules were independent. The basic rule of thumb is that a lift value close to 1 means the rules were completely independent. Lift values > 1 are generally more “interesting” and could be indicative of a useful rule pattern.

<img src="https://wiki.smu.edu.sg/1718t3isss608/img_auth.php/thumb/b/b3/Lift_Confidence_Support.png/600px-Lift_Confidence_Support.png"/>

In [183]:
#get the frequent lists with min support of 0.4
frequent_itemsets = fpgrowth(request_basket_sets, min_support=0.4, use_colnames=True)
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.max_colwidth', None):
    display(frequent_itemsets.sort_values(['support'],ascending=False))


Unnamed: 0,support,itemsets
0,0.871988,(NON_RECYCLABLE ITEMS)
1,0.811427,(RECYCLABLE ITEMS)
9,0.811046,"(NON_RECYCLABLE ITEMS, RECYCLABLE ITEMS)"
2,0.714019,(CART)
10,0.712801,"(CART, NON_RECYCLABLE ITEMS)"
11,0.692398,"(CART, RECYCLABLE ITEMS)"
12,0.692284,"(CART, NON_RECYCLABLE ITEMS, RECYCLABLE ITEMS)"
3,0.650413,(TRANSPORTATION)
13,0.64299,"(TRANSPORTATION, NON_RECYCLABLE ITEMS)"
4,0.625366,(HNS HEALTH AND SANITATION)


### Inference:

We can see that the support of NON_RECYCLABLE ITEMS and RECYCLABLE ITEMS is the highest in the frequest item sets which aligns with our data because these two are the highest reported type of 311 service requests.

The next highest are CART, TRANSPORTATION, HNS HEALTH AND SANITATION, MISSED SERVICE, GARBAGE and RECYCLING. All these are likely to occur in the same neighborhood profile area along with NON_RECYCLABLE ITEMS and RECYCLABLE ITEMS within a time-span of a month. 

We also see a few instances of 311 DOCUMENT which is likely to occur with NON_RECYCLABLE ITEMS and RECYCLABLE ITEMS

In [184]:
#creating the frequest request sets. We use a minimum support of 0.1 to create 
# more number of frequent lists than seen above
frequent_reqsets = apriori(request_basket_sets, min_support=0.1, use_colnames=True)

In [185]:
#generate rules with a minimum threshold as 1
rules = association_rules(frequent_reqsets, metric="lift", min_threshold=1)
rules.head(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(311 DOCUMENT),(BULKY ITEM),0.487458,0.222717,0.128583,0.263783,1.184384,0.020018,1.055779
1,(BULKY ITEM),(311 DOCUMENT),0.222717,0.487458,0.128583,0.577337,1.184384,0.020018,1.212651
2,(311 DOCUMENT),(CART),0.487458,0.714019,0.372692,0.764563,1.070788,0.024638,1.214683
3,(CART),(311 DOCUMENT),0.714019,0.487458,0.372692,0.521964,1.070788,0.024638,1.072183
4,(311 DOCUMENT),(COMPLAINT),0.487458,0.254235,0.146169,0.299859,1.179459,0.02224,1.065165
5,(COMPLAINT),(311 DOCUMENT),0.254235,0.487458,0.146169,0.574936,1.179459,0.02224,1.205801
6,(CUSTOMER FEEDBACK),(311 DOCUMENT),0.207339,0.487458,0.114575,0.552598,1.133632,0.013506,1.145596
7,(311 DOCUMENT),(CUSTOMER FEEDBACK),0.487458,0.207339,0.114575,0.235046,1.133632,0.013506,1.036221
8,(311 DOCUMENT),(DEAD ANIMAL COLLECTION),0.487458,0.267557,0.152221,0.312275,1.167135,0.021798,1.065023
9,(DEAD ANIMAL COLLECTION),(311 DOCUMENT),0.267557,0.487458,0.152221,0.568929,1.167135,0.021798,1.188997


In [186]:
rules.tail(30)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
116970,"(GARBAGE, TRANSPORTATION)","(311 DOCUMENT, RECYCLING, MISSED SERVICE, CART...",0.407522,0.139127,0.104336,0.256025,1.840225,0.047638,1.157126
116971,"(TRANSPORTATION, MISSED SERVICE)","(311 DOCUMENT, RECYCLING, GARBAGE, CART, HNS H...",0.411861,0.139013,0.104336,0.253327,1.822332,0.047082,1.153099
116972,"(TRANSPORTATION, CART)","(311 DOCUMENT, RECYCLING, GARBAGE, MISSED SERV...",0.565376,0.11526,0.104336,0.184542,1.601089,0.03917,1.08496
116973,"(TRANSPORTATION, HNS HEALTH AND SANITATION)","(311 DOCUMENT, RECYCLING, GARBAGE, MISSED SERV...",0.503293,0.128887,0.104336,0.207306,1.608428,0.039468,1.098927
116974,"(TRANSPORTATION, NON_RECYCLABLE ITEMS)","(311 DOCUMENT, RECYCLING, GARBAGE, MISSED SERV...",0.64299,0.113167,0.104336,0.162266,1.43387,0.031571,1.05861
116975,"(TRANSPORTATION, RECYCLABLE ITEMS)","(311 DOCUMENT, RECYCLING, GARBAGE, MISSED SERV...",0.619695,0.113509,0.104336,0.168366,1.483282,0.033995,1.065963
116976,"(GARBAGE, MISSED SERVICE)","(TRANSPORTATION, 311 DOCUMENT, RECYCLING, CART...",0.341593,0.158007,0.104336,0.305438,1.933067,0.050361,1.212265
116977,"(GARBAGE, CART)","(TRANSPORTATION, 311 DOCUMENT, RECYCLING, MISS...",0.456587,0.129496,0.104336,0.228512,1.76462,0.045209,1.128343
116978,"(GARBAGE, HNS HEALTH AND SANITATION)","(TRANSPORTATION, 311 DOCUMENT, RECYCLING, MISS...",0.400137,0.144875,0.104336,0.26075,1.79983,0.046366,1.156747
116979,"(GARBAGE, NON_RECYCLABLE ITEMS)","(TRANSPORTATION, 311 DOCUMENT, RECYCLING, MISS...",0.500095,0.125994,0.104336,0.208631,1.655878,0.041326,1.104423


In [187]:
rules.describe()

Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction
count,117000.0,117000.0,117000.0,117000.0,117000.0,117000.0,117000.0
mean,0.337491,0.337491,0.134623,0.508276,1.543043,0.044619,inf
std,0.188173,0.188173,0.043766,0.247092,0.189798,0.012621,
min,0.100148,0.100148,0.100148,0.114851,1.027318,0.00763,1.0103
25%,0.187736,0.187736,0.107761,0.301984,1.403026,0.037624,1.155712
50%,0.280347,0.280347,0.11979,0.461246,1.549216,0.044262,1.330334
75%,0.455826,0.455826,0.144304,0.697055,1.675661,0.051133,1.845136
max,0.871988,0.871988,0.811046,1.0,2.121184,0.113898,inf


### Inference:
We see that the highest lift is 2.12 and we also have some perfect confidence scores of 1.

To select the most frequent association rules, we choose a high lift (>2) and a high confidence too (>0.65 which is close to the 75% mark)

In [192]:
#fetch the most reliable association rules
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.max_colwidth', None):
    display(rules[ (rules['lift'] >= 2) &
        (rules['confidence'] >= 0.65)].sort_values(['lift'],ascending=False) )

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
98862,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, CART, RECYCLABLE ITEMS)",0.156941,0.314111,0.102014,0.650012,2.069373,0.052717,1.959752
113607,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, CART, NON_RECYCLABLE ITEMS, RECYCLABLE ITEMS)",0.156941,0.314111,0.102014,0.650012,2.069373,0.052717,1.959752
113546,"(NON_RECYCLABLE ITEMS, HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, CART, RECYCLABLE ITEMS)",0.156941,0.314111,0.102014,0.650012,2.069373,0.052717,1.959752
113547,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE, RECYCLABLE ITEMS)","(GARBAGE, MISSED SERVICE, NON_RECYCLABLE ITEMS, CART)",0.156294,0.317841,0.102014,0.652703,2.053553,0.052337,1.964197
113497,"(RECYCLING, YARD WASTE, HNS HEALTH AND SANITATION, NON_RECYCLABLE ITEMS, RECYCLABLE ITEMS)","(GARBAGE, MISSED SERVICE, CART)",0.156294,0.317993,0.102014,0.652703,2.05257,0.052313,1.963759
98833,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE, RECYCLABLE ITEMS)","(GARBAGE, MISSED SERVICE, CART)",0.156294,0.317993,0.102014,0.652703,2.05257,0.052313,1.963759
97980,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, NON_RECYCLABLE ITEMS, CART)",0.156941,0.317841,0.102356,0.652195,2.051954,0.052474,1.961326
66192,"(HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, CART)",0.156941,0.317993,0.102356,0.652195,2.050971,0.05245,1.960888
97951,"(NON_RECYCLABLE ITEMS, HNS HEALTH AND SANITATION, RECYCLING, YARD WASTE)","(GARBAGE, MISSED SERVICE, CART)",0.156941,0.317993,0.102356,0.652195,2.050971,0.05245,1.960888
115406,"(VIOLATIONS, RECYCLING, NON_RECYCLABLE ITEMS)","(TRANSPORTATION, GARBAGE, CART, HNS HEALTH AND SANITATION, RECYCLABLE ITEMS)",0.154771,0.319135,0.101252,0.654206,2.049933,0.051859,1.968987


### Conclusion for running Association Rules on the entire dataset:
As stated earlier, we see that if HNS HEALTH AND SANITATION, RECYCLING or YARD WASTE requests are raised, chances of GARBAGE, MISSED SERVICE, CART or RECYCLABLE ITEMS are likely to also be raised in a particular neighborhood profile area during a monthly timeframe