In [60]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
from tabulate import tabulate


### Load Clusters

In [33]:
clusters_ne_id_loc_name = pd.read_parquet("20240601_20240828_clusters_rdg_all_ne_id_loc_name_aggregation_column_first_occurrence_5min_5min_5min_delta.parquet")
clusters_ne_id_ne_address_first_three_octets = pd.read_parquet("20240601_20240828_clusters_rdg_all_ne_id_ne_address_first_three_octets_aggregation_column_first_occurrence_5min_5min_5min_delta.parquet")

### Filter only important columns

In [34]:
clusters_ne_id_loc_name_filtered = clusters_ne_id_loc_name[["cluster_id", "cluster_id2", "cluster_id3", "ne_type", "std_probable_cause_no"]]
clusters_ne_id_ne_address_first_three_octets_filtered = clusters_ne_id_ne_address_first_three_octets[["cluster_id", "cluster_id2", "cluster_id3", "ne_type", "std_probable_cause_no"]]

### Create new column "slogan_netype"

In [50]:
clusters_ne_id_loc_name_filtered = clusters_ne_id_loc_name_filtered.rename(columns={"std_probable_cause_no": "slogan"})
clusters_ne_id_ne_address_first_three_octets_filtered = clusters_ne_id_ne_address_first_three_octets_filtered.rename(columns={"std_probable_cause_no": "slogan"})

clusters_ne_id_loc_name_filtered["slogan_netype"] = clusters_ne_id_loc_name_filtered["slogan"] + "_" + clusters_ne_id_loc_name_filtered["ne_type"]
clusters_ne_id_ne_address_first_three_octets_filtered["slogan_netype"] = clusters_ne_id_ne_address_first_three_octets_filtered["slogan"] + "_" + clusters_ne_id_ne_address_first_three_octets_filtered["ne_type"]

In [80]:
clusters_ne_id_ne_address_first_three_octets_filtered = clusters_ne_id_ne_address_first_three_octets_filtered.dropna(subset=['slogan_netype'])

## Preprocessing and FP-Growth

In [61]:
def create_basket(data,aggregation_field):
    basket = data.groupby(aggregation_field)['slogan_netype'].apply(list)
    te = TransactionEncoder()
    te_ary = te.fit(basket).transform(basket)
    basket_df = pd.DataFrame(te_ary, columns=te.columns_)
    
    return basket_df

In [70]:
def print_frequent_itemsets(frequent_itemsets):
    frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(lambda x: ', '.join(list(x)))
    table = frequent_itemsets.values.tolist()
    return tabulate(table, headers=frequent_itemsets.columns, tablefmt='grid')

### Cluster ne_id_loc_name LVL 1-2-3

In [74]:
basket_df_lvl1 = create_basket(clusters_ne_id_loc_name_filtered, "cluster_id")
basket_df_lvl2 = create_basket(clusters_ne_id_loc_name_filtered, "cluster_id2")
basket_df_lvl3 = create_basket(clusters_ne_id_loc_name_filtered, "cluster_id3")

frequent_itemsets_lvl1 = fpgrowth(basket_df_lvl1, min_support=0.05, use_colnames=True)
frequent_itemsets_lvl2 = fpgrowth(basket_df_lvl2, min_support=0.05, use_colnames=True)
frequent_itemsets_lvl3 = fpgrowth(basket_df_lvl3, min_support=0.05, use_colnames=True)

frequent_itemsets_lvl1 = frequent_itemsets_lvl1.sort_values(by='support', ascending=False)
frequent_itemsets_lvl2 = frequent_itemsets_lvl2.sort_values(by='support', ascending=False)
frequent_itemsets_lvl3 = frequent_itemsets_lvl3.sort_values(by='support', ascending=False)

print("LVL 1")
print(print_frequent_itemsets(frequent_itemsets_lvl1))
print("\n")
print("LVL 2")
print(print_frequent_itemsets(frequent_itemsets_lvl2))
print("\n")
print("LVL 3")
print(print_frequent_itemsets(frequent_itemsets_lvl3))


LVL 1
+-----------+------------------------------------+
|   support | itemsets                           |
| 0.42876   | link-down_n/d                      |
+-----------+------------------------------------+
| 0.351292  | nodeunmanagable_n/d                |
+-----------+------------------------------------+
| 0.168442  | node-down_n/d                      |
+-----------+------------------------------------+
| 0.137755  | nodeunmanagable_n/d, node-down_n/d |
+-----------+------------------------------------+
| 0.0588854 | snmplinkup_n/d                     |
+-----------+------------------------------------+


LVL 2
+-----------+------------------------------------+
|   support | itemsets                           |
| 0.512136  | link-down_n/d                      |
+-----------+------------------------------------+
| 0.297899  | nodeunmanagable_n/d                |
+-----------+------------------------------------+
| 0.18255   | node-down_n/d                      |
+-----------+----

In [76]:
clusters_ne_id_ne_address_first_three_octets_filtered

Unnamed: 0,cluster_id,cluster_id2,cluster_id3,ne_type,slogan,slogan_netype
48867,55805,1,1,accesspoint,,
29596,55806,2,2,accesspoint,,
23576,55807,3,3,accesspoint,,
20715,55808,4,4,accesspoint,,
39779,55809,5,5,accesspoint,,
...,...,...,...,...,...,...
6667,39978,52678,52885,n/d,link-down,link-down_n/d
6666,39978,52678,52885,n/d,link-down,link-down_n/d
6677,39978,52678,52885,n/d,snmplinkup,snmplinkup_n/d
6670,39978,52678,52885,n/d,link-down,link-down_n/d


### Cluster ne_id_ne_address_first_three_octets LVL 1-2-3

In [82]:
basket_df_lvl1 = create_basket(clusters_ne_id_ne_address_first_three_octets_filtered, "cluster_id")
basket_df_lvl2 = create_basket(clusters_ne_id_ne_address_first_three_octets_filtered, "cluster_id2")
basket_df_lvl3 = create_basket(clusters_ne_id_ne_address_first_three_octets_filtered, "cluster_id3")

frequent_itemsets_lvl1 = fpgrowth(basket_df_lvl1, min_support=0.05, use_colnames=True)
frequent_itemsets_lvl2 = fpgrowth(basket_df_lvl2, min_support=0.05, use_colnames=True)
frequent_itemsets_lvl3 = fpgrowth(basket_df_lvl3, min_support=0.05, use_colnames=True)

frequent_itemsets_lvl1 = frequent_itemsets_lvl1.sort_values(by='support', ascending=False)
frequent_itemsets_lvl2 = frequent_itemsets_lvl2.sort_values(by='support', ascending=False)
frequent_itemsets_lvl3 = frequent_itemsets_lvl3.sort_values(by='support', ascending=False)

print("LVL 1")
print(print_frequent_itemsets(frequent_itemsets_lvl1))
print("\n")
print("LVL 2")
print(print_frequent_itemsets(frequent_itemsets_lvl2))
print("\n")
print("LVL 3")
print(print_frequent_itemsets(frequent_itemsets_lvl3))

LVL 1
+-----------+------------------------------------+
|   support | itemsets                           |
| 0.428924  | link-down_n/d                      |
+-----------+------------------------------------+
| 0.351054  | nodeunmanagable_n/d                |
+-----------+------------------------------------+
| 0.168247  | node-down_n/d                      |
+-----------+------------------------------------+
| 0.137576  | nodeunmanagable_n/d, node-down_n/d |
+-----------+------------------------------------+
| 0.0589542 | snmplinkup_n/d                     |
+-----------+------------------------------------+


LVL 2
+-----------+------------------------------------+
|   support | itemsets                           |
| 0.49554   | link-down_n/d                      |
+-----------+------------------------------------+
| 0.314145  | nodeunmanagable_n/d                |
+-----------+------------------------------------+
| 0.175776  | node-down_n/d                      |
+-----------+----