# Generate Device Subsets

Different from the subset, the purpose of this notebook is to generate 5-10 combinations of different devices (considering the totality of the rows from each one of them) for each cardinality from 2 to `len(devices)`.

In [28]:
import random
from itertools import combinations

import pandas as pd
from tqdm.autonotebook import tqdm

In [29]:
string_df = pd.read_csv("../../data/interim/string_df.csv")

In [30]:
string_df

Unnamed: 0,label,concatenated
0,GooglePixel3A_L,0000000000000000000000000000000000000000000000...
1,GooglePixel3A_L,0000000000000000000000000000000000000000000000...
2,GooglePixel3A_L,0000000000000000000000000000000000000000000000...
3,GooglePixel3A_L,0000000000000000000000000000000000000000000000...
4,GooglePixel3A_L,0000000000000000000000000000000000000000000000...
...,...,...
951,iPhoneXSMax_M,0001101000101101000000000001101111111111000000...
952,iPhoneXSMax_M,0001101000101101010000000001101111111111000000...
953,iPhoneXSMax_M,0001101000101101000000000001101111111111000000...
954,iPhoneXSMax_M,0001101000101101010000000001101111111111000000...


In [31]:
labels = string_df["label"].unique()

In [32]:
labels

array(['GooglePixel3A_L', 'GooglePixel3A_V', 'HuaweiHonor9_R',
       'HuaweiL21_D', 'HuaweiP10_Q', 'HuaweiP20_G', 'OnePlusNord_O',
       'OppoFindX3Neo_A', 'S21Ultra_M', 'SamsungJ6_K', 'SamsungM31_A',
       'SamsungS4_C', 'SamsungS6_H', 'SamsungS7_I', 'XiaomiA2_E',
       'XiaomiRedmi4_B', 'XiaomiRedmi5_J', 'XiaomiRedmiNote7_S',
       'XiaomiRedmiNote9S_T', 'iPhone11_B', 'iPhone11_C', 'iPhone11_F',
       'iPhone11_M', 'iPhone12Pro_C', 'iPhone12_M', 'iPhone12_W',
       'iPhone6_N', 'iPhone7_F', 'iPhone7_X', 'iPhoneXR_A', 'iPhoneXR_L',
       'iPhoneXR_U', 'iPhoneXSMax_M'], dtype=object)

In [33]:
all_combinations_list = []

In [34]:
max_devices = len(labels)
batches = 10

num_iter = range(0, batches)
i = 0

random.seed(42)

for i in tqdm(range(0, batches), desc="⚠️ Batch Processing"):
    for r in tqdm(range(2, max_devices + 1), desc="↘️ Batch #" + str(i + 1)):
        random_combinations = random.sample(string_df["label"].unique().tolist(), r)
        for labels_combination in combinations(random_combinations, r):
            # Append each combination and its length to the list
            all_combinations_list.append({
                'combination': labels_combination,
                'length': len(labels_combination)
            })

⚠️ Batch Processing:   0%|          | 0/10 [00:00<?, ?it/s]

↘️ Batch #1:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #2:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #3:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #4:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #5:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #6:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #7:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #8:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #9:   0%|          | 0/32 [00:00<?, ?it/s]

↘️ Batch #10:   0%|          | 0/32 [00:00<?, ?it/s]

In [35]:
# Convert the list of dictionaries to a DataFrame
all_combinations_df = pd.DataFrame(all_combinations_list)

# Remove duplicates: keep rows where the set of devices is unique
# Convert each combination to a set and drop duplicates
all_combinations_df['combination_set'] = all_combinations_df['combination'].apply(set)
all_combinations_df.drop_duplicates(subset='combination_set', keep='first', inplace=True)

# Drop the helper column 'combination_set' as it's no longer needed
all_combinations_df.drop(columns=['combination_set'], inplace=True)

In [36]:
all_combinations_df

Unnamed: 0,combination,length
0,"(OppoFindX3Neo_A, GooglePixel3A_V)",2
1,"(XiaomiRedmiNote7_S, XiaomiRedmi4_B, XiaomiA2_E)",3
2,"(S21Ultra_M, OnePlusNord_O, HuaweiP20_G, iPhon...",4
3,"(HuaweiHonor9_R, GooglePixel3A_V, HuaweiP20_G,...",5
4,"(iPhoneXSMax_M, GooglePixel3A_V, XiaomiRedmiNo...",6
...,...,...
314,"(SamsungM31_A, XiaomiRedmi5_J, iPhoneXR_A, Hua...",28
315,"(iPhone11_F, iPhoneXR_A, iPhone11_B, XiaomiRed...",29
316,"(GooglePixel3A_L, iPhone7_F, XiaomiRedmi5_J, S...",30
317,"(OppoFindX3Neo_A, GooglePixel3A_L, SamsungS6_H...",31


In [37]:
all_combinations_df.to_csv("../../data/train_test/10_combinations_df.csv", index=False)