In [3]:
import numpy as np
import pandas as pd
import math
from apyori import apriori

In [21]:
df=pd.read_csv('Laundry_Data.csv')

# Data Cleaning

In [5]:
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 807 entries, 0 to 806
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   No             807 non-null    int64  
 1   Date           807 non-null    object 
 2   Time           807 non-null    object 
 3   Race           752 non-null    object 
 4   Gender         716 non-null    object 
 5   Body_Size      709 non-null    object 
 6   Age_Range      756 non-null    float64
 7   With_Kids      725 non-null    object 
 8   Kids_Category  777 non-null    object 
 9   Basket_Size    752 non-null    object 
 10  Basket_colour  717 non-null    object 
 11  Attire         776 non-null    object 
 12  Shirt_Colour   720 non-null    object 
 13  shirt_type     770 non-null    object 
 14  Pants_Colour   802 non-null    object 
 15  pants_type     712 non-null    object 
 16  Wash_Item      718 non-null    object 
 17  Washer_No      807 non-null    int64  
 18  Dryer_No  

Unnamed: 0,No,Date,Time,Race,Gender,Body_Size,Age_Range,With_Kids,Kids_Category,Basket_Size,Basket_colour,Attire,Shirt_Colour,shirt_type,Pants_Colour,pants_type,Wash_Item,Washer_No,Dryer_No,Spectacles
0,1,19/10/2015,20:17:50,malay,,moderate,,yes,young,big,red,casual,blue,short_sleeve,black,short,clothes,3,10,no
1,2,19/10/2015,20:28:42,indian,male,thin,32.0,no,no_kids,big,green,casual,,short_sleeve,blue_jeans,long,,6,9,no
2,3,19/10/2015,20:58:31,malay,female,,30.0,no,no_kids,big,blue,casual,red,short_sleeve,black,long,,4,10,no
3,4,19/10/2015,21:31:28,indian,male,thin,51.0,no,no_kids,,black,casual,,short_sleeve,yellow,short,clothes,5,9,no
4,5,19/10/2015,21:40:28,indian,male,moderate,34.0,no,no_kids,big,blue,casual,blue,short_sleeve,white,long,clothes,3,9,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802,803,09/12/2015,20:05:46,malay,female,moderate,45.0,,no_kids,small,white,casual,red,long sleeve,black,long,clothes,3,10,no
803,804,09/12/2015,20:33:01,malay,male,fat,34.0,no,no_kids,big,grey,casual,white,short_sleeve,black,long,blankets,3,7,no
804,805,09/12/2015,20:37:01,malay,female,moderate,53.0,no,no_kids,big,purple,traditional,pink,long sleeve,pink,long,clothes,3,7,yes
805,806,09/12/2015,20:42:57,indian,female,moderate,37.0,no,no_kids,big,green,traditional,brown,short_sleeve,black,long,clothes,6,10,no


In [None]:
array(['malay','moderate','yes']
     ['ind']
     []
     [])

In [40]:
def fill_null_val(df):
    return df.fillna('Unknown')

def change_to_date(df):
    date=pd.to_datetime(df['Date'], infer_datetime_format=True)
    return df.assign(Date=date)

def change_to_time(df):
    time=pd.to_datetime(df['Time'], infer_datetime_format=True)
    time=pd.DatetimeIndex(time).time
    return df.assign(Time=time)

def get_day_col(df):
    dayCol=pd.DatetimeIndex(df['Date']).day
    return df.assign(Day=dayCol)

def get_month_col(df):
    monthCol=pd.DatetimeIndex(df['Date']).month
    return df.assign(Month=monthCol)

def fill_age(df):
    age=df['Age_Range'].fillna(round(df['Age_Range'].mean()))
    age=age.astype('int64') 
    return df.assign(Age_Range=age)

def fill_withKids_yes(df):
    with_kids = np.where(df["Kids_Category"]=='no_kids',df['With_Kids'],'yes')
    return df.assign(With_Kids=with_kids)

def drop_no(df):
    return df.drop('No',axis=1)

def fill_withKids_no(df):
    with_kids = np.where(df["Kids_Category"]!='no_kids',df['With_Kids'],'no')
    return df.assign(With_Kids=with_kids)

def mark_washer(df):
    washer = df['Washer_No'].apply(lambda x: "{}{}".format('W_', x))
    return df.assign(Washer_No=washer)

def mark_dryer(df):
    dryer = df['Dryer_No'].apply(lambda x: "{}{}".format('D_', x))
    return df.assign(Dryer_No=dryer)

def mark_shirt(df):
    shirt = df['shirt_type'].apply(lambda x: "{}{}".format('S_', x))
    return df.assign(shirt_type=shirt)

def mark_pants(df):
    pants = df['pants_type'].apply(lambda x: "{}{}".format('P_', x))
    return df.assign(pants_type=pants)

arm_drop=['Date','Time','Age_Range','Race','Gender','Body_Size','With_Kids','Kids_Category','Spectacles']
arm_select=['Basket_Size','Basket_colour','Washer_No','Dryer_No']

def drop_arm(df):
    return df.drop(columns=arm_drop)

def select_arm(df):
    return df[arm_select]

In [18]:
df=(df.pipe(change_to_date)
        .pipe(get_day_col)
        .pipe(get_month_col)
        .pipe(fill_age)
        .pipe(fill_withKids_yes)
        .pipe(fill_withKids_no)
        .pipe(drop_no)
        .pipe(fill_null_val)
    )



# Applying ARM

In [41]:
arm=(df.pipe(change_to_date)
        .pipe(mark_washer)
        .pipe(mark_dryer)
        .pipe(drop_no)
        .pipe(select_arm)
        .pipe(fill_null_val)
    )

In [43]:
arm['Dryer_No'].unique()
# arm.shape
# arm.info()
# arm

array(['D_10', 'D_9', 'D_8', 'D_7'], dtype=object)

In [129]:
len(arm.columns)

16

In [48]:
records = []
# for i in range(0, len(arm)):
#     records.append([str(arm[i,[j]]) for j in arm.columns])

# Iterate over each row 
for index, rows in arm.iterrows(): 
    columns=arm.columns
    my_list =[] 
    for att in columns:
        my_list.append(rows[att])
      
    # append the list to the final list 
    records.append(my_list)
    
records

[['big', 'red', 'W_3', 'D_10'],
 ['big', 'green', 'W_6', 'D_9'],
 ['big', 'blue', 'W_4', 'D_10'],
 ['Unknown', 'black', 'W_5', 'D_9'],
 ['big', 'blue', 'W_3', 'D_9'],
 ['small', 'white', 'W_4', 'D_8'],
 ['big', 'pink', 'W_5', 'D_10'],
 ['big', 'blue', 'W_3', 'D_9'],
 ['small', 'purple', 'W_6', 'D_10'],
 ['small', 'pink', 'W_5', 'D_7'],
 ['big', 'red', 'W_4', 'D_8'],
 ['Unknown', 'blue', 'W_4', 'D_8'],
 ['big', 'blue', 'W_3', 'D_7'],
 ['big', 'yellow', 'W_3', 'D_9'],
 ['big', 'red', 'W_3', 'D_8'],
 ['big', 'brown', 'W_5', 'D_9'],
 ['Unknown', 'Unknown', 'W_4', 'D_7'],
 ['big', 'blue', 'W_5', 'D_9'],
 ['Unknown', 'blue', 'W_6', 'D_7'],
 ['big', 'pink', 'W_5', 'D_10'],
 ['big', 'black', 'W_3', 'D_10'],
 ['big', 'Unknown', 'W_5', 'D_9'],
 ['big', 'white', 'W_3', 'D_7'],
 ['small', 'white', 'W_3', 'D_7'],
 ['big', 'white', 'W_5', 'D_7'],
 ['small', 'orange', 'W_3', 'D_8'],
 ['big', 'white', 'W_6', 'D_9'],
 ['big', 'Unknown', 'W_5', 'D_9'],
 ['big', 'blue', 'W_4', 'D_10'],
 ['big', 'Unknown'

In [74]:
association_rules = apriori(records, min_support=0.01, min_confidence=0.2, min_lift=2, min_length=2)
association_results = list(association_rules)
association_results

[RelationRecord(items=frozenset({'pink', 'W_5'}), support=0.022304832713754646, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pink'}), items_add=frozenset({'W_5'}), confidence=0.5142857142857142, lift=2.184360902255639)]),
 RelationRecord(items=frozenset({'white', 'small'}), support=0.03841387856257745, ordered_statistics=[OrderedStatistic(items_base=frozenset({'small'}), items_add=frozenset({'white'}), confidence=0.543859649122807, lift=2.0899749373433583)]),
 RelationRecord(items=frozenset({'D_10', 'small', 'W_5'}), support=0.012391573729863693, ordered_statistics=[OrderedStatistic(items_base=frozenset({'D_10', 'W_5'}), items_add=frozenset({'small'}), confidence=0.2272727272727273, lift=3.2177033492822966), OrderedStatistic(items_base=frozenset({'D_10', 'small'}), items_add=frozenset({'W_5'}), confidence=0.5263157894736843, lift=2.235457063711912)]),
 RelationRecord(items=frozenset({'blue', 'D_10', 'W_6'}), support=0.023543990086741014, ordered_statistics=[OrderedStatis

In [76]:
len(association_results)

16

In [75]:
print(association_results[0])

RelationRecord(items=frozenset({'pink', 'W_5'}), support=0.022304832713754646, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pink'}), items_add=frozenset({'W_5'}), confidence=0.5142857142857142, lift=2.184360902255639)])


In [77]:
cnt =0

for item in association_results:
    cnt += 1
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("(Rule " + str(cnt) + ") " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(round(item[1],3)))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(round(item[2][0][2],4)))
    print("Lift: " + str(round(item[2][0][3],4)))
    print("=====================================")

(Rule 1) pink -> W_5
Support: 0.022
Confidence: 0.5143
Lift: 2.1844
(Rule 2) white -> small
Support: 0.038
Confidence: 0.5439
Lift: 2.09
(Rule 3) D_10 -> small
Support: 0.012
Confidence: 0.2273
Lift: 3.2177
(Rule 4) blue -> D_10
Support: 0.024
Confidence: 0.5588
Lift: 2.3127
(Rule 5) white -> D_10
Support: 0.014
Confidence: 0.5789
Lift: 2.2248
(Rule 6) W_3 -> blue
Support: 0.031
Confidence: 0.5814
Lift: 2.0578
(Rule 7) W_4 -> D_8
Support: 0.019
Confidence: 0.5172
Lift: 2.2935
(Rule 8) W_4 -> D_8
Support: 0.024
Confidence: 0.5
Lift: 2.0379
(Rule 9) D_9 -> blue
Support: 0.015
Confidence: 0.5
Lift: 2.2293
(Rule 10) big -> pink
Support: 0.02
Confidence: 0.4571
Lift: 2.4112
(Rule 11) white -> small
Support: 0.016
Confidence: 0.2281
Lift: 4.0901
(Rule 12) big -> blue
Support: 0.024
Confidence: 0.5588
Lift: 2.7004
(Rule 13) W_3 -> big
Support: 0.03
Confidence: 0.5581
Lift: 2.2521
(Rule 14) W_4 -> D_8
Support: 0.016
Confidence: 0.4483
Lift: 2.2896
(Rule 15) D_9 -> big
Support: 0.014
Confidence

In [70]:
len(df[df.isnull().any(axis=1)])/len(df)*100


0.0

In [71]:
df.isnull().sum()


No               0
Date             0
Time             0
Race             0
Gender           0
Body_Size        0
Age_Range        0
With_Kids        0
Kids_Category    0
Basket_Size      0
Basket_colour    0
Attire           0
Shirt_Colour     0
shirt_type       0
Pants_Colour     0
pants_type       0
Wash_Item        0
Washer_No        0
Dryer_No         0
Spectacles       0
Day              0
Month            0
dtype: int64

In [19]:
print('Race\n',df['Race'].value_counts())
print('Gender\n',df['Gender'].value_counts())
print('Body_Size\n',df['Body_Size'].value_counts())
print('With_Kids\n',df['With_Kids'].value_counts())
print('Kids_Category\n',df['Kids_Category'].value_counts())



Race
 indian        227
malay         222
chinese       221
foreigner      82
Unknown        55
Name: Race, dtype: int64
Gender
 female     361
male       355
Unknown     91
Name: Gender, dtype: int64
Body_Size
 fat         247
moderate    237
thin        225
Unknown      98
Name: Body_Size, dtype: int64
With_Kids
 no     603
yes    204
Name: With_Kids, dtype: int64
Kids_Category
 no_kids     603
young        90
toddler      47
baby         36
Unknown      30
toddler       1
Name: Kids_Category, dtype: int64
