In [2]:
import pandas as pd
from tqdm import tqdm

## Load Clusters and Alarms

In [5]:
clusters = pd.read_parquet("real-time clusters/20230101-20240101_real_time_clusters.parquet")

In [3]:
mob = pd.read_parquet("alarms datasets/mob/20230101-20240101_inpas_mob_preprocess__an__last_event__last_event__ext1.parquet")

In [4]:
adsl = pd.read_parquet("alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__adsl__last_event__last_event__ext1.parquet")

In [5]:
ptn = pd.read_parquet("alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__ptn__last_event__last_event__ext1.parquet")

In [6]:
sdh = pd.read_parquet("alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__sdh__last_event__last_event__ext1.parquet")

In [7]:
pdh = pd.read_parquet("alarms datasets/tx/20230101-20240101_inpas_tx_preprocess__pdh__last_event__last_event__ext1.parquet")

In [8]:
mob = mob[["alarm_id", "last_occurrence"]]

In [9]:
adsl = adsl[["alarm_id", "last_occurrence"]]
ptn = ptn[["alarm_id", "last_occurrence"]]
sdh = sdh[["alarm_id", "last_occurrence"]]
pdh = pdh[["alarm_id", "last_occurrence"]]


In [10]:
alarms = pd.concat([mob, adsl, ptn, sdh, pdh])


In [11]:
alarms.to_parquet("alarms datasets/alarms.parquet")

## Clusters Statistics

In [22]:
clusters = pd.read_parquet("real-time clusters/20230101-20240101_real_time_clusters.parquet")
alarms = pd.read_parquet("alarms datasets/alarms.parquet")

In [23]:
clusters_filtered = clusters[["alarm_id", "cluster_id", "cluster_id2","first_occurrence"]]

In [24]:
clusters_complete = clusters_filtered.merge(alarms, on="alarm_id", how="left")

In [25]:
clusters_complete["first_occurrence"] = pd.to_datetime(clusters_complete["first_occurrence"])

In [26]:
clusters_complete = clusters_complete.dropna(subset=["last_occurrence"])

In [27]:
clusters_complete.to_parquet("real-time clusters/clusters_complete.parquet")

### Clusters Size

Number of alarms within a cluster

In [28]:
clusters_complete = pd.read_parquet("real-time clusters/clusters_complete.parquet")

In [29]:
clusters_grouped_by_id = clusters_complete.groupby(["cluster_id", "cluster_id2"])

In [30]:
clusters_grouped = clusters_grouped_by_id 

In [31]:
size_df = clusters_grouped_by_id.size().reset_index(name='size')

In [32]:
size_df.to_parquet("real-time clusters/size_df.parquet")

### Clusters Lifespan

Time difference from the last last_occurrence and the first first_occurrence of alarms 

In [33]:

def calculate_lifespan(group):
    return group['last_occurrence'].max() - group['first_occurrence'].min()

tqdm.pandas()


lifespan_df = clusters_grouped.apply(calculate_lifespan, include_groups=False).reset_index(name='lifespan')


In [34]:
lifespan_df.to_parquet("real-time clusters/lifespan_df.parquet")

### Clusters Delta First Occurrence

Distance between the last available first_occurrence and the first

In [35]:


def calculate_lifespan(group):
    return group['first_occurrence'].max() - group['first_occurrence'].min()


tqdm.pandas()


delta_df = clusters_grouped.progress_apply(calculate_lifespan).reset_index(name='delta first occurrence')

  0%|          | 0/6369371 [00:00<?, ?it/s]

100%|██████████| 6369371/6369371 [16:33<00:00, 6414.14it/s]


In [77]:
delta_df

Unnamed: 0,cluster_id,cluster_id2,lifespan
0,202301010000_1,202301010000_33,0 days
1,202301010000_10,202301010000_133,0 days
2,202301010000_104,202301010000_79,0 days
3,202301010000_105,202301010000_80,0 days
4,202301010000_106,202301010000_81,0 days
...,...,...,...
6369366,202312312358_5,202312312354_5,0 days
6369367,202312312358_6,202312312354_5,0 days
6369368,202312312358_7,202312312354_5,0 days
6369369,202312312358_8,202312312354_7,0 days


In [36]:
delta_df.to_parquet("real-time clusters/delta_df.parquet")

In [3]:
delta_df = pd.read_parquet("real-time clusters/delta_df.parquet")

In [5]:
delta_df.sort_values(by="delta first occurrence", ascending=False)

Unnamed: 0,cluster_id,cluster_id2,lifespan
5187223,202311041120_19,202311041120_6,0 days 02:07:54
4355824,202309272328_4,202309272328_3,0 days 01:53:46
3079985,202307270332_16,202307270332_10,0 days 01:34:24
5710744,202311272202_10,202311272202_3,0 days 01:32:50
399034,202302020154_50,202302020154_8,0 days 01:22:56
...,...,...,...
2419111,202306300446_37,202306300434_9,0 days 00:00:00
2419110,202306300446_36,202306300434_9,0 days 00:00:00
2419109,202306300446_35,202306300434_9,0 days 00:00:00
2419105,202306300446_31,202306300442_9,0 days 00:00:00


## Merge Statistics

In [37]:
size_df = pd.read_parquet("real-time clusters/size_df.parquet")
lifespan_df = pd.read_parquet("real-time clusters/lifespan_df.parquet")
delta_df = pd.read_parquet("real-time clusters/delta_df.parquet")

In [38]:
clusters_statistics = size_df.merge(lifespan_df, on=["cluster_id", "cluster_id2"], how="left").merge(delta_df, on=["cluster_id", "cluster_id2"], how="left")

In [19]:
clusters_statistics

Unnamed: 0,cluster_id,cluster_id2,size,lifespan_x,lifespan_y
0,202301010000_1,202301010000_33,1,1 days 23:25:34,0 days
1,202301010000_10,202301010000_133,1,0 days 01:14:29,0 days
2,202301010000_104,202301010000_79,1,0 days 01:38:28,0 days
3,202301010000_105,202301010000_80,1,0 days 01:58:25,0 days
4,202301010000_106,202301010000_81,1,0 days 06:55:03,0 days
...,...,...,...,...,...
6369366,202312312358_5,202312312354_5,1,0 days 00:04:34,0 days
6369367,202312312358_6,202312312354_5,1,0 days 00:03:57,0 days
6369368,202312312358_7,202312312354_5,1,0 days 00:00:00,0 days
6369369,202312312358_8,202312312354_7,1,0 days 00:05:31,0 days


In [39]:
clusters_statistics.to_parquet("real-time clusters/clusters_statistics.parquet")

## Extract Unique Slogans from Alarms


In [8]:
mob_slogan = mob[["alarm_id", "mob_slogan"]]
adsl_slogan = adsl[["alarm_id", "std_probable_cause_no"]]## , "summary"]
ptn_slogan = ptn[["alarm_id", "std_probable_cause_no"]]
sdh_slogan = sdh[["alarm_id", "std_probable_cause_no"]]
pdh_slogan = pdh[["alarm_id", "std_probable_cause_no"]]


In [10]:
mob_slogan.rename(columns={"mob_slogan": "slogan"}, inplace=True)
adsl_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)
ptn_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)
sdh_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)
pdh_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mob_slogan.rename(columns={"mob_slogan": "slogan"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adsl_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ptn_slogan.rename(columns={"std_probable_cause_no": "slogan"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_

In [11]:
adsl_slogan

Unnamed: 0,alarm_id,slogan
0,COS4P;86723842,fermo totale apparato
1,COS3P;66481069,loss of signal los
2,COS4P;86723883,loss of signal los
3,COS4P;86723880,loss of signal los
4,COS4P;86723881,loss of signal los
...,...,...
9326841,EFMTX;6591efa50d1c1f0001da3661,loss of signal los
9326842,EFMTX;6591f1e60d1c1f0001da3841,allarme alimentazione
9326843,EFMTX;6591f101975f5100015a4868,apparato isolato
9326844,EFMTX;6591f092feea70000175b052,allarme alimentazione


In [12]:
alarms_slogan = pd.concat([mob_slogan, adsl_slogan, ptn_slogan, sdh_slogan, pdh_slogan])

In [13]:
alarms_slogan

Unnamed: 0,alarm_id,slogan
0,EFM;63b098b01a65d200011620f8,
1,EFM;63b0b476c623b40001a68b45,
2,EFM;63b0b5a8c623b40001a68b9b,
3,EFM;63b0b5c6c623b40001a68ba2,
4,EFM;63b0b675c623b40001a68c03,
...,...,...
805948,EFMTX;6483fbf332d3d9000193e3a2,fama no alarm since hh
805949,EFMTX;64c92bbe008e1d0001c7ff49,offline sw fama nport
805950,EFMTX;65661efc590d4500011934d1,fama no alarm since hh
805951,EFMTX;6591f1b6975f5100015a48b6,signal degraded


In [14]:
alarms_slogan.to_parquet("alarms datasets/alarms_slogan.parquet")

In [6]:
alarms_slogan = pd.read_parquet("alarms datasets/alarms_slogan.parquet")

In [7]:
alarms_slogan = alarms_slogan.drop("alarm_id", axis=1)

In [8]:
alarms_slogan

Unnamed: 0,slogan
0,
1,
2,
3,
4,
...,...
805948,fama no alarm since hh
805949,offline sw fama nport
805950,fama no alarm since hh
805951,signal degraded


In [9]:
unique_slogans = alarms_slogan['slogan'].unique()

alarms_slogan_unique_slogans = pd.DataFrame({'slogan': unique_slogans})


In [10]:
alarms_slogan_unique_slogans

Unnamed: 0,slogan
0,
1,guasto sistema di alimentazione
2,allarme interruttore batteria b
3,guasto grave condizionamento
4,mancanza rete
...,...
21531,anorm rete raddriz
21532,avar diese quadro com
21533,cdz tx locale tx pal
21534,pot tx pr siae rt


In [8]:
alarms_slogan_unique_slogans.to_parquet("alarms datasets/alarms_slogan_unique_slogans.parquet")

## Associate for each Cluster a bitmap with the slogans 

In [11]:
alarms_slogan = pd.read_parquet("alarms datasets/alarms_slogan.parquet")

In [12]:
alarms_slogan

Unnamed: 0,alarm_id,slogan
0,EFM;63b098b01a65d200011620f8,
1,EFM;63b0b476c623b40001a68b45,
2,EFM;63b0b5a8c623b40001a68b9b,
3,EFM;63b0b5c6c623b40001a68ba2,
4,EFM;63b0b675c623b40001a68c03,
...,...,...
805948,EFMTX;6483fbf332d3d9000193e3a2,fama no alarm since hh
805949,EFMTX;64c92bbe008e1d0001c7ff49,offline sw fama nport
805950,EFMTX;65661efc590d4500011934d1,fama no alarm since hh
805951,EFMTX;6591f1b6975f5100015a48b6,signal degraded


In [13]:
grouped_alarms = alarms_slogan.groupby("alarm_id")

In [6]:
grouped_alarms.size().sort_values(ascending=False)

alarm_id
COS1B;107016710                   1
EFMTX;644ea414adefac00017e2cdf    1
EFMTX;644ea3da4ed0ed00011c5a22    1
EFMTX;644ea4044ed0ed00011c5a40    1
EFMTX;644ea405e9081200013c14d7    1
                                 ..
EFM;64a2ac80a4a90400015bf004      1
EFM;64a2ac813a4a890001d85ae4      1
EFM;64a2ac813a4a890001d85ae5      1
EFM;64a2ac822a3b2e00012a2726      1
ROS1P;172620901                   1
Length: 49966959, dtype: int64

In [14]:
clusters_filtered = clusters[["alarm_id", "cluster_id", "cluster_id2"]]

In [15]:
clusters_filtered

Unnamed: 0,alarm_id,cluster_id,cluster_id2
0,EFM;63af9dfe2aaf2c000125807a,202301010000_3,202301010000_1
1,EFM;63af9dfec623b40001a5c9cf,202301010000_3,202301010000_1
2,COS1P;136902468,202301010000_20,202301010000_2
3,EFM;63b0beaf2aaf2c0001258736,202301010000_9,202301010000_3
4,COS1P;136891403,202301010000_21,202301010000_4
...,...,...,...
38,EFMTX;6591efb9d7d6bc00019444d8,202312312350_16,202312312350_13
39,EFMTX;6591f046d8beb800016deaef,202312312348_17,202312312344_9
40,EFM;6591f199d4c961000177739d,202312312358_1,202312312358_13
41,EFM;6591f19aba09b0000160c08d,202312312358_1,202312312358_13


In [16]:
clusters_with_slogans = pd.merge(clusters_filtered, alarms_slogan, on='alarm_id', how='left')

In [17]:
clusters_with_slogans.to_parquet("real-time clusters/clusters_with_slogans.parquet")

In [112]:
unique_slogans = pd.read_parquet("alarms datasets/alarms_slogan_unique_slogans.parquet")

In [113]:
unique_slogans

Unnamed: 0,slogan
0,
1,guasto sistema di alimentazione
2,allarme interruttore batteria b
3,guasto grave condizionamento
4,mancanza rete
...,...
21531,anorm rete raddriz
21532,avar diese quadro com
21533,cdz tx locale tx pal
21534,pot tx pr siae rt


In [114]:
unique_slogans = unique_slogans['slogan'].unique()

In [108]:
clusters_with_slogans = pd.read_parquet("real-time clusters/clusters_with_slogans.parquet")

In [94]:
clusters_with_slogans = clusters_with_slogans

In [105]:
grouped_clusters = clusters_with_slogans.groupby(["cluster_id", "cluster_id2"])

In [117]:

# Definisci una funzione per calcolare la presenza di ciascuno slogan
def calculate_presence(group):
    presence = {slogan: 0 for slogan in unique_slogans}
    for slogan in group['slogan']:
        presence[slogan] = 1
    return pd.Series(presence)

tqdm.pandas()

# Applica la funzione a ciascun gruppo e crea un nuovo dataframe con i risultati
result_df = grouped_clusters.progress_apply(calculate_presence)

result_df.index = result_df.index.map(lambda x: f"cluster_id: {x[0]} , cluster_id2: {x[1]}")



  0%|          | 906/6369401 [00:54<106:18:05, 16.64it/s]


KeyboardInterrupt: 

In [120]:
import pandas as pd

# Define a function to calculate the presence of each slogan
def calculate_presence(group):
    presence = pd.Series(0, index=unique_slogans)
    presence[group['slogan'].fillna('')] = 1
    return presence

tqdm.pandas()

# Apply the function to each group and create a new DataFrame with the results
result_df = grouped_clusters.apply(calculate_presence)

# Modify the index
result_df.index = result_df.index.map(lambda x: f"cluster_id: {x[0]} , cluster_id2: {x[1]}")


In [115]:
# Raggruppa i dati per cluster e slogan, e conta il numero di occorrenze di ciascuno slogan in ciascun cluster
counts = clusters_with_slogans.groupby(['cluster_id', 'cluster_id2', 'slogan']).size()

# Crea un nuovo dataframe con gli slogan come colonne e i cluster come indici
result_df = counts.unstack(fill_value=0)

# Modifica l'indice del dataframe
result_df.index = result_df.index.map(lambda x: f"cluster_id: {x[0]} , cluster_id2: {x[1]}")

In [116]:
result_df

slogan,alimentazione,allarme alimentazione,allarme interno,allarme protezione,anomalia rf rx,apparato disservito,apparato isolato,apparato isolato pr,canale di comunicazione interno down,cardinitcard initializing,...,serversignalfailure specific problem non inviato da agent,signal degraded,signal degraded critical,switch unitpath signal label unequipped,tunnel aps degraded downgrade of aps tunnel protection group,tunnel aps outage outage of aps tunnel protection group,unidentified specific problem non inviato da agent,vc unequipped ho vc unequipped,vc unequipped lo vc unequipped,vlan megaco ko
"cluster_id: 202301010000_104 , cluster_id2: 202301010000_79",0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_105 , cluster_id2: 202301010000_80",0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_106 , cluster_id2: 202301010000_81",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_107 , cluster_id2: 202301010000_82",0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_108 , cluster_id2: 202301010000_82",0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"cluster_id: 202312312358_22 , cluster_id2: 202312312358_6",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202312312358_23 , cluster_id2: 202312312354_3",0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202312312358_26 , cluster_id2: 202312312358_11",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202312312358_27 , cluster_id2: 202312312358_12",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [68]:
result_df

Unnamed: 0,None,guasto sistema di alimentazione,allarme interruttore batteria b,guasto grave condizionamento,mancanza rete,allarme temperatura,vo anomalia e guasto s e,guasto condizionamento,allarme porta,minima tensione batteria,...,rete interrompibilita,ricetrasme pr lsy,tand pr lsy,complesso teleco mu,ge in all,anorm rete raddriz,avar diese quadro com,cdz tx locale tx pal,pot tx pr siae rt,alim pr siae rt
"cluster_id: 202301010000_1 , cluster_id2: 202301010000_33",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_10 , cluster_id2: 202301010000_133",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_100 , cluster_id2: 202301010000_78",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_101 , cluster_id2: 202301010000_78",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301010000_102 , cluster_id2: 202301010000_77",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"cluster_id: 202301011514_3 , cluster_id2: 202301011514_1",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301011514_5 , cluster_id2: 202301011514_2",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301011514_7 , cluster_id2: 202301011512_1",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"cluster_id: 202301011514_8 , cluster_id2: 202301011512_1",1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [96]:
import pandas as pd

def calculate_presence(group):
    presence = pd.Series(0, index=unique_slogans)
    presence[group['slogan']] = 1
    return presence

# Assuming 'grouped_clusters' is a DataFrameGroupBy object
# containing groups of clusters, and 'unique_slogans' is a list
# of unique slogans

# tqdm is not used in the optimized version as it won't significantly
# improve the performance of vectorized operations

result_df = grouped_clusters.progress_apply(calculate_presence)

# If 'grouped_clusters' is a DataFrameGroupBy object with multiple
# levels of grouping, you may need to adjust the index mapping accordingly
result_df.index = result_df.index.map(lambda x: f"cluster_id: {x[0]} , cluster_id2: {x[1]}")


  0%|          | 1/6369401 [00:17<31585:29:44, 17.85s/it]


ValueError: Cannot mask with non-boolean array containing NA / NaN values

In [65]:
# Definisci una funzione per calcolare la presenza di ciascuno slogan
def calculate_presence(group):
    # Inizializza un dizionario con tutti gli slogan unici come chiavi e 0 come valori
    presence = {slogan: 0 for slogan in group['slogan'].unique()}
    # Per ogni slogan nel gruppo, imposta il valore corrispondente nel dizionario a 1
    presence.update({slogan: 1 for slogan in group['slogan']})
    # Restituisce una serie pandas creata dal dizionario
    return pd.Series(presence)

tqdm.pandas()

# Applica la funzione a ciascun gruppo e crea un nuovo dataframe con i risultati
result_df = grouped_clusters.progress_apply(calculate_presence)

result_df.index = result_df.index.map(lambda x: f"cluster_id: {x[0]} , cluster_id2: {x[1]}")

  0%|          | 0/5434 [00:00<?, ?it/s]

100%|██████████| 5434/5434 [00:05<00:00, 960.22it/s] 


In [66]:
result_df

cluster_id: 202301010000_1 , cluster_id2: 202301010000_33      1
cluster_id: 202301010000_10 , cluster_id2: 202301010000_133    1
cluster_id: 202301010000_100 , cluster_id2: 202301010000_78    1
cluster_id: 202301010000_101 , cluster_id2: 202301010000_78    1
cluster_id: 202301010000_102 , cluster_id2: 202301010000_77    1
                                                              ..
cluster_id: 202301011514_3 , cluster_id2: 202301011514_1       1
cluster_id: 202301011514_5 , cluster_id2: 202301011514_2       1
cluster_id: 202301011514_7 , cluster_id2: 202301011512_1       1
cluster_id: 202301011514_8 , cluster_id2: 202301011512_1       1
cluster_id: 202301011514_9 , cluster_id2: 202301011514_3       1
Length: 5788, dtype: int64

In [37]:
import pandas as pd
from tqdm import tqdm

# Esempio di dataframe
data = {
    'alarm_id': [1, 2, 3, 4, 5, 6],
    'cluster_id': [1, 1, 2, 2, 3, 3],
    'cluster_id2': [1, 2, 1, 2, 1, 2],
    'slogan': ['slogan1', 'slogan2', 'slogan3', 'slogan4', 'slogan5', 'slogan6']
}

df = pd.DataFrame(data)

# Raggruppa le righe in base a cluster_id e cluster_id2
grouped = df.groupby(['cluster_id', 'cluster_id2'])

# Lista degli slogan univoci
unique_slogans = df['slogan'].unique()

# Definisci una funzione per calcolare la presenza di ciascuno slogan
def calculate_presence(group):
    presence = {slogan: 0 for slogan in unique_slogans}
    for slogan in group['slogan']:
        presence[slogan] = 1
    return pd.Series(presence)

tqdm.pandas()

# Applica la funzione a ciascun gruppo e crea un nuovo dataframe con i risultati
result_df = grouped.progress_apply(calculate_presence)

# Rinomina l'indice (cluster_id, cluster_id2)
result_df.index = result_df.index.map(lambda x: f"({x[0]}, {x[1]})")

print(result_df)


100%|██████████| 6/6 [00:00<00:00, 1477.65it/s]

        slogan1  slogan2  slogan3  slogan4  slogan5  slogan6
(1, 1)        1        0        0        0        0        0
(1, 2)        0        1        0        0        0        0
(2, 1)        0        0        1        0        0        0
(2, 2)        0        0        0        1        0        0
(3, 1)        0        0        0        0        1        0
(3, 2)        0        0        0        0        0        1





In [38]:
unique_slogans

array(['slogan1', 'slogan2', 'slogan3', 'slogan4', 'slogan5', 'slogan6'],
      dtype=object)