In [3]:
import re
import random
import pandas as pd
import hashlib

In [4]:
with open('certcards2.txt', mode='r', encoding='utf8') as f:
    all_cards = f.read()
    
card_split_pattern = r'\n\n\n\d+\n'
all_cards = re.split(card_split_pattern, all_cards)
# Use re.DOTALL to allow . to match newline characters
card_pattern = re.compile(r'(.+?)\n([\s\S]+)', re.DOTALL)
cards = [(match.group(1), match.group(2)) for cstring in all_cards if (match := re.search(card_pattern, cstring))]

# removing the cards that have no content and trimming
cards = [(subject, stripped_content) for subject, content in cards if len(stripped_content := content.strip()) > 5]

def hash_string_md5(s):
    """
    Hashes a string using MD5 and returns a truncated hash for efficiency.

    Parameters:
    - s (str): The input string to hash.

    Returns:
    - str: The truncated hexadecimal hash string.
    """
    if pd.isnull(s):
        return None  # Handle NaN values gracefully
    return hashlib.md5(s.encode('utf-8')).hexdigest()  # Truncate to first 12 characters


def remake_card_document(existing_cards: pd.DataFrame, filename: str='certcards2.txt'):
    with open(filename, mode='w', encoding='utf8') as f:
        i = 1
        for _, row in existing_cards.iterrows():
            print(i)
            f.write('\n'*5)
            f.write(str(i)+'\n')  
            f.write(row['head']+'\n')
            f.write(row['body'])
            i+=1
            # print(F"{row['head']}: {row['age']:.4f}")


existing_cards = pd.DataFrame(cards, columns=['head', 'body'])


# existing_cards['age'] = [random.random() for _ in existing_cards.index]
existing_cards['hash'] = existing_cards['body'].apply(hash_string_md5)
existing_cards

Unnamed: 0,head,body,hash
0,Windows,You can increase or decrease the size of icons...,3a0af754d5909a32b6633c9407900dad
1,MS Dataverse,The capability to extend the default behavior ...,ed4b9ad5a81d5e638c61d6cd7e32bf01
2,Child Services AWS Review,Mainframe-as-a-Service (MFaaS) is a subscripti...,493d2025b15649f4e4bf4bf206d4285b
3,DAX Studio,Referencing Tables Using EVALUATE: You can als...,35d2388d03a9da0debbad267bf6ad1f4
4,Tax stuff,"""TA-2"" refers to the Transient Accommodations ...",7367393b770c9b331cfdefe3c30f01c5
...,...,...,...
355,Python,"When it comes to pre-trained models, TensorFlo...",b3f73714e6b6dd3887df7c2d2effb595
356,Python,The purpose of the .__init__() method in a Pyt...,dfeadf2dc34f5c52473326752587fcd4
357,Python,You don’t need to use a dedicated tool to crea...,7792b02e5cbf678111837dd1a85af009
358,Python,Augmented assignment operators provide a more ...,8229c364af46d999e5b82fd475cd8335


# Completely Random Shuffle

In [5]:
# rows, cols = existing_cards.shape

# existing_cards = existing_cards.sample(frac=1)
# remake_card_document(filename='certcards2.txt')

# # print(f"{i-1} cards found")

# Age Shuffle

In [6]:
card_ages = pd.read_json('card_ages.json')
# found_cards = pd.DataFrame(cards, columns=['head', 'body'])
# found_cards['hash'] = found_cards['body'].apply(hash_string_md5)

cards_to_age = pd.merge(
    left=existing_cards,
    right=card_ages[['hash', 'age']],
    left_on='hash', right_on='hash',
    how='left'
)



cards_to_age['age'] = cards_to_age['age'].fillna(0)
cards_to_age['age'] = cards_to_age['age'] * 1.1
cards_to_age['age'] = cards_to_age['age'] + [random.random() for _ in cards_to_age.index]

cards_to_age = cards_to_age.sort_values('age', ascending=False)
cards_to_age.drop_duplicates(subset=['hash'], keep='first')
cards_to_age.to_json('card_ages.json', indent=2)


with open('certcards2.txt', mode='w', encoding='utf8') as f:
    i = 1
    for _, row in cards_to_age.iterrows():
        print(i)
        f.write('\n'*5)
        f.write(str(i)+'\n')  
        f.write(row['head']+'\n')
        f.write(row['body'])
        i+=1
        print(F"{row['head']}: {row['age']:.4f}")

# cards_to_age

1
OData: 6.8402
2
OData Requests: 6.8204
3
Python: 6.7260
4
OData Requests: 6.7173
5
OData: 6.7102
6
Power BI: 6.6831
7
sklearn: 6.5634
8
MS Data Analyst: 6.4047
9
Diffusers Library: 6.3275
10
Azure AI Search: 6.3120
11
Maths: 6.2888
12
Diffusers Library: 6.2513
13
numpy: 6.2508
14
Conditional Access: 6.2491
15
PP365: 6.2467
16
Azure Functions: 6.2326
17
Conditional Access: 6.2312
18
General: 6.2259
19
DNS: 6.2034
20
Conditional Access: 6.1966
21
Azure Functions Quickstart: 6.1116
22
HuggingFace diffusers: 6.1078
23
Python: 6.0912
24
sklearn : 6.0831
25
OData: 6.0728
26
Diffusers Library: 6.0563
27
OData: 6.0178
28
General: 6.0166
29
Python: 6.0109
30
Diffusers Library: 5.9981
31
Azure Storage: 5.9955
32
Dataverse Queries: 5.9938
33
Python: 5.9928
34
pandas: 5.9888
35
numpy: 5.9708
36
UHero Requests: 5.9346
37
Power BI: 5.9198
38
Azure VDI Project: 5.9166
39
Python: 5.9061
40
Diffusers Library: 5.8829
41
Python: 5.8774
42
General: 5.8689
43
Site Scraping: 5.8630
44
Diffusers Documentat

# Headers with fewest notes first

In [7]:
# frequency = existing_cards['head'].value_counts(ascending=True)
# print(frequency)

# existing_cards = pd.merge(
#     left=existing_cards,
#     right=frequency.rename('frequency'),
#     left_on='head', right_index=True,
#     how='left'
# )
# existing_cards.sort_values('frequency', ascending=True, inplace=True)

# remake_card_document(filename='certcards2.txt')

In [8]:
existing_cards

Unnamed: 0,head,body,hash
0,Windows,You can increase or decrease the size of icons...,3a0af754d5909a32b6633c9407900dad
1,MS Dataverse,The capability to extend the default behavior ...,ed4b9ad5a81d5e638c61d6cd7e32bf01
2,Child Services AWS Review,Mainframe-as-a-Service (MFaaS) is a subscripti...,493d2025b15649f4e4bf4bf206d4285b
3,DAX Studio,Referencing Tables Using EVALUATE: You can als...,35d2388d03a9da0debbad267bf6ad1f4
4,Tax stuff,"""TA-2"" refers to the Transient Accommodations ...",7367393b770c9b331cfdefe3c30f01c5
...,...,...,...
355,Python,"When it comes to pre-trained models, TensorFlo...",b3f73714e6b6dd3887df7c2d2effb595
356,Python,The purpose of the .__init__() method in a Pyt...,dfeadf2dc34f5c52473326752587fcd4
357,Python,You don’t need to use a dedicated tool to crea...,7792b02e5cbf678111837dd1a85af009
358,Python,Augmented assignment operators provide a more ...,8229c364af46d999e5b82fd475cd8335
