In this notebook, I am going to analyze spider prey relation data. 
3 Spider and 76 prey. 

In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth

In [2]:
df = pd.read_csv('spider_prey_relation_table.csv')
df.head()

Unnamed: 0,Prey-Id,SA Date-Time,Signal Time,Spider-Id,Distance From Spider,Trip No
0,v_bv,2019-01-01 06:19:00,2019-01-01 06:19:00,1.0,7.19,1.0
1,b_tt_1,2019-01-01 06:38:00,2019-01-01 06:38:00,0.0,3.18,1.0
2,rd_end_north,2019-01-01 06:42:00,2019-01-01 06:42:00,0.0,7.48,1.0
3,y_rt,2019-01-01 06:55:00,2019-01-01 06:55:00,2.0,5.34,1.0
4,b_tt_3,2019-01-01 07:01:00,2019-01-01 07:01:00,2.0,7.72,1.0


Check for Null values

In [3]:
df.isna().sum()

Prey-Id                 0
SA Date-Time            0
Signal Time             0
Spider-Id               0
Distance From Spider    0
Trip No                 0
dtype: int64

In [4]:
df.shape

(27019, 6)

In [5]:
df['Prey-Id'].nunique()

76

In [6]:
df['Prey-Id'].unique()

array(['v_bv', 'b_tt_1', 'rd_end_north', 'y_rt', 'b_tt_3', 'v_end_east',
       'o_w_1', 'b_bv', 'rh_end_tt_3', 'y_rh', 'v_end_west',
       'nature_end_west', 'w_bw', 'rh_end_north', 'b_rt', 'rc_end_north',
       'rh_end_tt_1', 'b_y', 'rh_end_tt_2', 'w_rc', 'y_rs', 'g_w',
       'rd_end_south', 'y_gy1', 'w_rs', 'v_rt', 'bstub_w', 'o_gy2',
       'rs_end_north', 'v_rc', 'v_rs', 'rd_end_south_dupe', 'w_rt',
       'rh_end_south', 'rc_end_south', 'b_rc', 'g_rt', 'park_east',
       'o_rh', 'w_gy2', 'o_gy1', 'rt_end_north', 'b_tt_2', 'w_gy1',
       'y_gy2', 'b_rs', 'v_rd', 'b_tt_2_dupe', 'v_rh', 'rs_end_south',
       'g_rs', 'o_y_tt_end_west', 'b_end_west', 'g_gy1', 'o_tt', 'o_w_2',
       'b_tt_3_dupe', 'b_g', 'b_rh', 'rh_end_tt_4', 'b_rd_dupe', 'b_bw',
       'b_o', 'y_rc', 'tt_rt', 'g_rc', 'o_rc', 'b_gy2', 'w_tt', 'b_rd',
       'rt_end_south', 'o_rs', 'g_gy2', 'b_w', 'o_rt', 'b_v'],
      dtype=object)

mlxtend.frequent_patterns, apriori doesn't work with string so I shall map every prey id to a unique integer number. 

In [7]:
d ={}
i =0
for v in sorted(df['Prey-Id'].unique()):
    d[v] =i
    i = i + 1

In [8]:
df['Prey-Id'].replace(d, inplace=True)

In [9]:
df.head()

Unnamed: 0,Prey-Id,SA Date-Time,Signal Time,Spider-Id,Distance From Spider,Trip No
0,55,2019-01-01 06:19:00,2019-01-01 06:19:00,1.0,7.19,1.0
1,12,2019-01-01 06:38:00,2019-01-01 06:38:00,0.0,3.18,1.0
2,41,2019-01-01 06:42:00,2019-01-01 06:42:00,0.0,7.48,1.0
3,75,2019-01-01 06:55:00,2019-01-01 06:55:00,2.0,5.34,1.0
4,15,2019-01-01 07:01:00,2019-01-01 07:01:00,2.0,7.72,1.0


In [10]:
len(df['SA Date-Time'][0])

19

In [11]:
df = df[df['SA Date-Time'].str.len() == 19]

In [13]:
df['SA Date-Time'] = pd.to_datetime(df['SA Date-Time']).dt.date
df.drop(['Signal Time'], inplace=True, axis=1)
df.head()

Unnamed: 0,Prey-Id,SA Date-Time,Spider-Id,Distance From Spider,Trip No
0,55,2019-01-01,1.0,7.19,1.0
1,12,2019-01-01,0.0,3.18,1.0
2,41,2019-01-01,0.0,7.48,1.0
3,75,2019-01-01,2.0,5.34,1.0
4,15,2019-01-01,2.0,7.72,1.0


Group,  preys  for spider. 

In [14]:
df =df[['SA Date-Time','Spider-Id', 'Trip No', 'Prey-Id']].groupby(['SA Date-Time','Spider-Id'])['Prey-Id'].apply(list)
df =df.reset_index(inplace=False)
df = df[df['Prey-Id'].str.len()<13]
df['Prey-Id'] = df['Prey-Id'].apply(list)
records = df['Prey-Id'].tolist()

In [15]:
records[:2]

[[61, 38, 5, 30, 4, 38, 4, 8, 41, 47, 14, 72],
 [15, 44, 26, 39, 23, 55, 21, 41, 73, 47, 2, 25]]

In [16]:
te = TransactionEncoder()
te_ary = te.fit(records).transform(records)
dff = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(dff, min_support=0.01, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.205479,(0)
1,0.027397,(1)
2,0.136986,(2)
3,0.136986,(3)
4,0.315068,(4)
...,...,...
113740,0.013699,"(67, 4, 8, 42, 21, 55, 24, 25, 27, 29, 30, 31)"
113741,0.013699,"(4, 70, 39, 10, 45, 15, 49, 50, 51, 52, 61, 30)"
113742,0.013699,"(64, 68, 7, 11, 12, 45, 75, 16, 48, 18, 52, 21)"
113743,0.013699,"(32, 34, 66, 39, 7, 11, 16, 19, 21, 23, 25, 60)"
