In [10]:
import pandas as pd
import random

# Sample items
items = ["milk", "bread", "butter", "eggs", "jam", "juice", "apple", "banana", "cheese", "coffee"]

# Generate 30 random sequences
data = {
    "sequence": [
        ",".join(random.sample(items, random.randint(3, 6)))  # 3â€“6 items per sequence
        for _ in range(30)
    ]
}

df = pd.DataFrame(data)
df.to_csv("transactions.csv", index=True)
print(df.head())


                     sequence
0  cheese,banana,bread,butter
1            apple,coffee,jam
2   bread,apple,cheese,banana
3      eggs,juice,milk,butter
4         butter,banana,juice


In [11]:
import pandas as pd
from pymining import seqmining

df = pd.read_csv("transactions.csv")
sequences = df['sequence'].apply(lambda x: x.strip().split(",")).tolist()
min_support = 2

freq_seqs_spade = seqmining.freq_seq_enum(sequences, min_support)
print("Frequent Sequential Patterns (SPADE):")
for seq, freq in freq_seqs_spade:
    print(seq, "-> support:", freq)


Frequent Sequential Patterns (SPADE):
('jam',) -> support: 10
('milk', 'butter') -> support: 4
('bread', 'milk', 'banana') -> support: 2
('eggs', 'banana') -> support: 4
('cheese',) -> support: 12
('jam', 'cheese', 'bread') -> support: 2
('cheese', 'apple') -> support: 2
('juice', 'banana') -> support: 5
('milk', 'eggs') -> support: 3
('milk', 'juice') -> support: 3
('bread', 'coffee') -> support: 8
('apple', 'bread') -> support: 4
('apple', 'juice') -> support: 2
('eggs', 'bread', 'banana') -> support: 2
('juice', 'milk', 'butter') -> support: 3
('juice', 'milk', 'cheese') -> support: 2
('bread', 'milk') -> support: 3
('butter', 'banana') -> support: 4
('juice', 'bread', 'coffee') -> support: 2
('juice', 'jam') -> support: 3
('eggs', 'bread') -> support: 3
('cheese', 'banana') -> support: 4
('cheese', 'juice') -> support: 3
('apple', 'cheese') -> support: 3
('juice', 'banana', 'cheese') -> support: 2
('bread', 'eggs') -> support: 4
('juice', 'apple') -> support: 2
('jam', 'butter') ->

In [21]:
from gsppy.gsp import GSP
min_support_count = 2
min_support = min_support_count / len(sequences)
gsp_instance = GSP(sequences)
frequent_sequences = gsp_instance.search(min_support)

for freq_dict in frequent_sequences:
    for pattern, support in freq_dict.items():
        print(pattern,"-> support",support)



(('cheese',),) -> support 12
(('banana',),) -> support 17
(('bread',),) -> support 19
(('butter',),) -> support 17
(('apple',),) -> support 9
(('coffee',),) -> support 10
(('jam',),) -> support 10
(('eggs',),) -> support 10
(('juice',),) -> support 17
(('milk',),) -> support 10
(('cheese',), ('banana',)) -> support 3
(('cheese',), ('bread',)) -> support 2
(('cheese',), ('juice',)) -> support 2
(('banana',), ('butter',)) -> support 2
(('banana',), ('jam',)) -> support 3
(('banana',), ('juice',)) -> support 3
(('bread',), ('banana',)) -> support 2
(('bread',), ('butter',)) -> support 2
(('bread',), ('coffee',)) -> support 5
(('bread',), ('eggs',)) -> support 2
(('bread',), ('milk',)) -> support 2
(('butter',), ('banana',)) -> support 2
(('butter',), ('apple',)) -> support 2
(('butter',), ('jam',)) -> support 2
(('butter',), ('juice',)) -> support 3
(('apple',), ('cheese',)) -> support 2
(('apple',), ('bread',)) -> support 3
(('coffee',), ('butter',)) -> support 3
(('jam',), ('cheese',)) 

In [None]:
import pandas as pd
from itertools import combinations
df = pd.read_csv("transactions.csv")
sequence = df['sequence'].apply(lambda x: x.strip().split(",")).tolist()

min_support = 2  # minimum support threshold
def is_subseq(subseq,seq):
  it=iter(seq)
  return all(item in it for item in subseq)
def gsp(sequences,ms=2):
  pattern=[]

  unique=set(item for seq in sequence for item in seq)
  freq_p=[]
  for item in unique:
    count=sum(is_subseq([item],seq) for seq in sequence)
    if count>=ms:
      freq_p.append(([item],count))
  while freq_p:
    pattern.extend(freq_p)
    new_c_pat=[]

    seq=[fp[0] for fp in freq_p]
    for a , b in combinations(seq,2):
      if a[:-1]==b[:-1]:
        new_c_pat.append(a+[b[-1]])
    new_freq_p=[]
    for cond in new_c_pat:
      count=sum(is_subseq(cond,seq) for seq in sequence)
      if count>=ms:
        new_freq_p.append((cond,count))
    freq_p=new_freq_p
  return pattern

pattern=gsp(sequences,2)
for seq,sup in pattern:
  print(f"{seq}->{sup}")



['juice']->9
['jam']->11
['bread']->11
['coffee']->11
['cheese']->14
['apple']->14
['banana']->15
['butter']->11
['milk']->14
['eggs']->10
['juice', 'coffee']->2
['juice', 'banana']->4
['juice', 'butter']->4
['juice', 'milk']->3
['juice', 'eggs']->2
['jam', 'bread']->2
['jam', 'apple']->2
['jam', 'butter']->3
['jam', 'milk']->4
['jam', 'eggs']->2
['bread', 'coffee']->2
['bread', 'apple']->2
['bread', 'banana']->4
['bread', 'butter']->3
['bread', 'milk']->4
['coffee', 'cheese']->2
['coffee', 'apple']->2
['coffee', 'banana']->2
['coffee', 'butter']->3
['coffee', 'milk']->3
['cheese', 'apple']->2
['cheese', 'banana']->3
['cheese', 'butter']->3
['cheese', 'milk']->4
['cheese', 'eggs']->2
['apple', 'banana']->6
['apple', 'butter']->3
['apple', 'milk']->4
['banana', 'milk']->3
['banana', 'eggs']->2
['butter', 'milk']->3
['juice', 'butter', 'milk']->2
['bread', 'apple', 'butter']->2


In [None]:
import pandas as pd
from prefixspan import PrefixSpan

df = pd.read_csv("transactions.csv")
sequences = df['sequence'].apply(lambda x: x.strip().split(",")).tolist()
min_support = 2

ps = PrefixSpan(sequences)
prefixspan_results = ps.frequent(min_support)
print("Frequent Sequential Patterns (PrefixSpan):")
for freq, seq in prefixspan_results:
    print(seq, "-> support:", freq)


Frequent Sequential Patterns (PrefixSpan):
['cheese'] -> support: 15
['cheese', 'banana'] -> support: 3
['cheese', 'banana', 'butter'] -> support: 2
['cheese', 'banana', 'butter', 'milk'] -> support: 2
['cheese', 'banana', 'milk'] -> support: 2
['cheese', 'butter'] -> support: 3
['cheese', 'butter', 'milk'] -> support: 2
['cheese', 'milk'] -> support: 3
['cheese', 'apple'] -> support: 4
['cheese', 'jam'] -> support: 3
['cheese', 'coffee'] -> support: 3
['cheese', 'eggs'] -> support: 3
['cheese', 'eggs', 'milk'] -> support: 2
['cheese', 'juice'] -> support: 2
['banana'] -> support: 14
['banana', 'butter'] -> support: 3
['banana', 'butter', 'milk'] -> support: 2
['banana', 'milk'] -> support: 5
['banana', 'milk', 'cheese'] -> support: 2
['banana', 'cheese'] -> support: 6
['banana', 'eggs'] -> support: 4
['banana', 'eggs', 'milk'] -> support: 2
['banana', 'jam'] -> support: 2
['banana', 'bread'] -> support: 3
['banana', 'bread', 'cheese'] -> support: 2
['banana', 'coffee'] -> support: 2
[