In [1]:
import numpy as np
import pandas as pd
import math
from apyori import apriori

In [2]:
df=pd.read_csv('Laundry_Data.csv')

# Data Cleaning

In [3]:
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 807 entries, 0 to 806
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   No             807 non-null    int64  
 1   Date           807 non-null    object 
 2   Time           807 non-null    object 
 3   Race           752 non-null    object 
 4   Gender         716 non-null    object 
 5   Body_Size      709 non-null    object 
 6   Age_Range      756 non-null    float64
 7   With_Kids      725 non-null    object 
 8   Kids_Category  777 non-null    object 
 9   Basket_Size    752 non-null    object 
 10  Basket_colour  717 non-null    object 
 11  Attire         776 non-null    object 
 12  Shirt_Colour   720 non-null    object 
 13  shirt_type     770 non-null    object 
 14  Pants_Colour   802 non-null    object 
 15  pants_type     712 non-null    object 
 16  Wash_Item      718 non-null    object 
 17  Washer_No      807 non-null    int64  
 18  Dryer_No  

Unnamed: 0,No,Date,Time,Race,Gender,Body_Size,Age_Range,With_Kids,Kids_Category,Basket_Size,Basket_colour,Attire,Shirt_Colour,shirt_type,Pants_Colour,pants_type,Wash_Item,Washer_No,Dryer_No,Spectacles
0,1,19/10/2015,20:17:50,malay,,moderate,,yes,young,big,red,casual,blue,short_sleeve,black,short,clothes,3,10,no
1,2,19/10/2015,20:28:42,indian,male,thin,32.0,no,no_kids,big,green,casual,,short_sleeve,blue_jeans,long,,6,9,no
2,3,19/10/2015,20:58:31,malay,female,,30.0,no,no_kids,big,blue,casual,red,short_sleeve,black,long,,4,10,no
3,4,19/10/2015,21:31:28,indian,male,thin,51.0,no,no_kids,,black,casual,,short_sleeve,yellow,short,clothes,5,9,no
4,5,19/10/2015,21:40:28,indian,male,moderate,34.0,no,no_kids,big,blue,casual,blue,short_sleeve,white,long,clothes,3,9,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802,803,09/12/2015,20:05:46,malay,female,moderate,45.0,,no_kids,small,white,casual,red,long sleeve,black,long,clothes,3,10,no
803,804,09/12/2015,20:33:01,malay,male,fat,34.0,no,no_kids,big,grey,casual,white,short_sleeve,black,long,blankets,3,7,no
804,805,09/12/2015,20:37:01,malay,female,moderate,53.0,no,no_kids,big,purple,traditional,pink,long sleeve,pink,long,clothes,3,7,yes
805,806,09/12/2015,20:42:57,indian,female,moderate,37.0,no,no_kids,big,green,traditional,brown,short_sleeve,black,long,clothes,6,10,no


In [None]:
array(['malay','moderate','yes']
     ['ind']
     []
     [])

In [4]:
def fill_null_val(df):
    return df.fillna('Unknown')

def change_to_date(df):
    date=pd.to_datetime(df['Date'], infer_datetime_format=True)
    return df.assign(Date=date)

def change_to_time(df):
    time=pd.to_datetime(df['Time'], infer_datetime_format=True)
    time=pd.DatetimeIndex(time).time
    return df.assign(Time=time)

def get_day_col(df):
    dayCol=pd.DatetimeIndex(df['Date']).day
    return df.assign(Day=dayCol)

def get_month_col(df):
    monthCol=pd.DatetimeIndex(df['Date']).month
    return df.assign(Month=monthCol)

def fill_age(df):
    age=df['Age_Range'].fillna(round(df['Age_Range'].mean()))
    age=age.astype('int64') 
    return df.assign(Age_Range=age)

def fill_withKids_yes(df):
    with_kids = np.where(df["Kids_Category"]=='no_kids',df['With_Kids'],'yes')
    return df.assign(With_Kids=with_kids)

def drop_no(df):
    return df.drop('No',axis=1)





def fill_withKids_no(df):
    with_kids = np.where(df["Kids_Category"]!='no_kids',df['With_Kids'],'no')
    return df.assign(With_Kids=with_kids)

def mark_washer(df):
    washer = df['Washer_No'].apply(lambda x: "{}{}".format('W_', x))
    return df.assign(Washer_No=washer)

def mark_dryer(df):
    dryer = df['Dryer_No'].apply(lambda x: "{}{}".format('D_', x))
    return df.assign(Dryer_No=dryer)

def mark_shirt(df):
    shirt = df['shirt_type'].apply(lambda x: "{}{}".format('S_', x))
    return df.assign(shirt_type=shirt)

def mark_pants(df):
    pants = df['pants_type'].apply(lambda x: "{}{}".format('P_', x))
    return df.assign(pants_type=pants)

def drop_arm(df):
    return df.drop(columns=['Date','Time','Age_Range','Race','Gender','Body_Size','With_Kids','Kids_Category','Spectacles'])

In [47]:
df=(df.pipe(change_to_date)
        .pipe(get_day_col)
        .pipe(get_month_col)
        .pipe(fill_age)
        .pipe(fill_withKids_yes)
        .pipe(fill_withKids_no)
        .pipe(drop_no)
        .pipe(fill_null_val)
    )



# Applying ARM

In [5]:
arm=(df.pipe(change_to_date)
        .pipe(fill_age)
        .pipe(fill_withKids_yes)
        .pipe(fill_withKids_no)
        .pipe(mark_washer)
        .pipe(mark_dryer)
        .pipe(mark_shirt)
        .pipe(mark_pants)
        .pipe(drop_no)
        .pipe(drop_arm)
        .pipe(fill_null_val)
    )

In [180]:
arm['Dryer_No'].unique()
arm.shape
arm.info()
arm

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 807 entries, 0 to 806
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Basket_Size    807 non-null    object
 1   Basket_colour  807 non-null    object
 2   Attire         807 non-null    object
 3   Shirt_Colour   807 non-null    object
 4   shirt_type     807 non-null    object
 5   Pants_Colour   807 non-null    object
 6   pants_type     807 non-null    object
 7   Wash_Item      807 non-null    object
 8   Washer_No      807 non-null    object
 9   Dryer_No       807 non-null    object
dtypes: object(10)
memory usage: 63.2+ KB


Unnamed: 0,Basket_Size,Basket_colour,Attire,Shirt_Colour,shirt_type,Pants_Colour,pants_type,Wash_Item,Washer_No,Dryer_No
0,big,red,casual,blue,S_short_sleeve,black,P_short,clothes,W_3,D_10
1,big,green,casual,Unknown,S_short_sleeve,blue_jeans,P_long,Unknown,W_6,D_9
2,big,blue,casual,red,S_short_sleeve,black,P_long,Unknown,W_4,D_10
3,Unknown,black,casual,Unknown,S_short_sleeve,yellow,P_short,clothes,W_5,D_9
4,big,blue,casual,blue,S_short_sleeve,white,P_long,clothes,W_3,D_9
...,...,...,...,...,...,...,...,...,...,...
802,small,white,casual,red,S_long sleeve,black,P_long,clothes,W_3,D_10
803,big,grey,casual,white,S_short_sleeve,black,P_long,blankets,W_3,D_7
804,big,purple,traditional,pink,S_long sleeve,pink,P_long,clothes,W_3,D_7
805,big,green,traditional,brown,S_short_sleeve,black,P_long,clothes,W_6,D_10


In [129]:
len(arm.columns)

16

In [6]:
records = []
# for i in range(0, len(arm)):
#     records.append([str(arm[i,[j]]) for j in arm.columns])

# Iterate over each row 
for index, rows in arm.iterrows(): 
    columns=arm.columns
    my_list =[] 
    for att in columns:
        my_list.append(rows[att])
      
    # append the list to the final list 
    records.append(my_list)
    
records

[['big',
  'red',
  'casual',
  'blue',
  'S_short_sleeve',
  'black',
  'P_short',
  'clothes',
  'W_3',
  'D_10'],
 ['big',
  'green',
  'casual',
  'Unknown',
  'S_short_sleeve',
  'blue_jeans',
  'P_long',
  'Unknown',
  'W_6',
  'D_9'],
 ['big',
  'blue',
  'casual',
  'red',
  'S_short_sleeve',
  'black',
  'P_long',
  'Unknown',
  'W_4',
  'D_10'],
 ['Unknown',
  'black',
  'casual',
  'Unknown',
  'S_short_sleeve',
  'yellow',
  'P_short',
  'clothes',
  'W_5',
  'D_9'],
 ['big',
  'blue',
  'casual',
  'blue',
  'S_short_sleeve',
  'white',
  'P_long',
  'clothes',
  'W_3',
  'D_9'],
 ['small',
  'white',
  'casual',
  'brown',
  'S_nan',
  'blue_jeans',
  'P_long',
  'clothes',
  'W_4',
  'D_8'],
 ['big',
  'pink',
  'traditional',
  'blue',
  'S_nan',
  'black',
  'P_long',
  'clothes',
  'W_5',
  'D_10'],
 ['big',
  'blue',
  'casual',
  'Unknown',
  'S_short_sleeve',
  'brown',
  'P_long',
  'clothes',
  'W_3',
  'D_9'],
 ['small',
  'purple',
  'casual',
  'white',
  'S_s

In [7]:
association_rules = apriori(records, min_support=0.02, min_confidence=0.2, min_lift=2, min_length=2)
association_results = list(association_rules)
association_results

[RelationRecord(items=frozenset({'traditional', 'S_long sleeve'}), support=0.04584882280049566, ordered_statistics=[OrderedStatistic(items_base=frozenset({'S_long sleeve'}), items_add=frozenset({'traditional'}), confidence=0.22839506172839505, lift=3.021554341226472), OrderedStatistic(items_base=frozenset({'traditional'}), items_add=frozenset({'S_long sleeve'}), confidence=0.6065573770491802, lift=3.0215543412264716)]),
 RelationRecord(items=frozenset({'brown', 'small'}), support=0.023543990086741014, ordered_statistics=[OrderedStatistic(items_base=frozenset({'small'}), items_add=frozenset({'brown'}), confidence=0.33333333333333326, lift=2.3189655172413786)]),
 RelationRecord(items=frozenset({'D_7', 'blankets', 'S_long sleeve'}), support=0.022304832713754646, ordered_statistics=[OrderedStatistic(items_base=frozenset({'blankets', 'S_long sleeve'}), items_add=frozenset({'D_7'}), confidence=0.5806451612903225, lift=2.0110757303059668)]),
 RelationRecord(items=frozenset({'D_7', 'blankets',

In [8]:
len(association_results)

218

In [184]:
print(association_results[0])

RelationRecord(items=frozenset({'S_long sleeve', 'traditional'}), support=0.04584882280049566, ordered_statistics=[OrderedStatistic(items_base=frozenset({'S_long sleeve'}), items_add=frozenset({'traditional'}), confidence=0.22839506172839505, lift=3.021554341226472), OrderedStatistic(items_base=frozenset({'traditional'}), items_add=frozenset({'S_long sleeve'}), confidence=0.6065573770491802, lift=3.0215543412264716)])


In [9]:
cnt =0

for item in association_results:
    cnt += 1
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("(Rule " + str(cnt) + ") " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(round(item[1],3)))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(round(item[2][0][2],4)))
    print("Lift: " + str(round(item[2][0][3],4)))
    print("=====================================")

(Rule 1) traditional -> S_long sleeve
Support: 0.046
Confidence: 0.2284
Lift: 3.0216
(Rule 2) brown -> small
Support: 0.024
Confidence: 0.3333
Lift: 2.319
(Rule 3) D_7 -> blankets
Support: 0.022
Confidence: 0.5806
Lift: 2.0111
(Rule 4) D_7 -> blankets
Support: 0.037
Confidence: 0.6522
Lift: 2.2588
(Rule 5) P_long -> S_long sleeve
Support: 0.032
Confidence: 0.5
Lift: 2.4907
(Rule 6) P_long -> purple
Support: 0.022
Confidence: 0.6923
Lift: 3.4487
(Rule 7) P_long -> traditional
Support: 0.045
Confidence: 0.2222
Lift: 3.0395
(Rule 8) P_long -> small
Support: 0.022
Confidence: 0.3158
Lift: 2.2355
(Rule 9) P_long -> traditional
Support: 0.03
Confidence: 0.3934
Lift: 2.7852
(Rule 10) P_short -> purple
Support: 0.026
Confidence: 0.6364
Lift: 2.3666
(Rule 11) P_short -> W_6
Support: 0.048
Confidence: 0.6724
Lift: 2.0554
(Rule 12) P_short -> red
Support: 0.027
Confidence: 0.6875
Lift: 2.1016
(Rule 13) big -> traditional
Support: 0.038
Confidence: 0.5082
Lift: 2.9718
(Rule 14) formal -> S_long sl

In [70]:
len(df[df.isnull().any(axis=1)])/len(df)*100


0.0

In [71]:
df.isnull().sum()


No               0
Date             0
Time             0
Race             0
Gender           0
Body_Size        0
Age_Range        0
With_Kids        0
Kids_Category    0
Basket_Size      0
Basket_colour    0
Attire           0
Shirt_Colour     0
shirt_type       0
Pants_Colour     0
pants_type       0
Wash_Item        0
Washer_No        0
Dryer_No         0
Spectacles       0
Day              0
Month            0
dtype: int64

In [19]:
print('Race\n',df['Race'].value_counts())
print('Gender\n',df['Gender'].value_counts())
print('Body_Size\n',df['Body_Size'].value_counts())
print('With_Kids\n',df['With_Kids'].value_counts())
print('Kids_Category\n',df['Kids_Category'].value_counts())



Race
 indian        227
malay         222
chinese       221
foreigner      82
Unknown        55
Name: Race, dtype: int64
Gender
 female     361
male       355
Unknown     91
Name: Gender, dtype: int64
Body_Size
 fat         247
moderate    237
thin        225
Unknown      98
Name: Body_Size, dtype: int64
With_Kids
 no     603
yes    204
Name: With_Kids, dtype: int64
Kids_Category
 no_kids     603
young        90
toddler      47
baby         36
Unknown      30
toddler       1
Name: Kids_Category, dtype: int64
