In [11]:
import itertools
import string
import random
import json
import pandas as pd
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 50)

In [2]:
def generate_row_letters(num_strings: int, max_length: int = 2):
    counter = 0
    for length in range(1, max_length + 1):
        if counter >= num_strings:
            break
        for item in itertools.product(string.ascii_uppercase, repeat=length):
            if counter >= num_strings:
                return
            yield "".join(item)
            counter += 1

In [3]:
def _gen_filler(n_cols: int, has_empty: bool = True) -> list[float]:
    fill = []
    for _ in range(n_cols):
        if has_empty:
            fill.append(None)
        # bias towards blanks since most plates are surrounded by blanks
        r = random.randint(1, 6 if has_empty else 3)
        if r == 1:
            fill.append(random.random())
        elif r == 2:
            fill.append(random.randint(1, 1000))
        elif r == 3:
            res = ''.join(random.choices(string.ascii_lowercase + string.ascii_uppercase +
                             string.digits, k=random.randint(5, 50)))
            fill.append(res)
        else:
            fill.append(None)
    return fill
    

In [4]:
def create_nontabular_plate(
    n_rows: int,
    n_cols: int,
    offset: int,
    contents: str = 'Plate',
    n_filler: int = 10
) -> pd.DataFrame:
    cols_before = random.randint(1, n_filler)
    cols_after = random.randint(1, n_filler)
    tot_cols = n_cols + cols_before + cols_after
    rows_before = random.randint(1, n_filler)
    rows_after = random.randint(1, n_filler)
    # Add one for the header
    row_start = rows_before + offset + 1
    results = {'row_start': row_start, 
               'row_end': row_start + (n_rows - 1), # it's inclusive
               'col_start': cols_before, 
               'col_end': cols_before + (n_cols - 1), # it's inclusive
               'contents': contents}
    plate_rows = []
    letters = list(generate_row_letters(n_rows))
    # Generate junk rows before the header
    for row in range(rows_before):
        plate_rows.append(_gen_filler(tot_cols))
    # Generate the header which is a monotonically increasing sequence
    plate_header = (
        _gen_filler(cols_before) + list(range(1, n_cols + 1)) + _gen_filler(cols_after)
    )
    plate_rows.append(plate_header)
    # Fill out the rest of the plate
    for row in range(n_rows):
        data_row = (
            _gen_filler(cols_before - 1)
            + [letters[row]]
            # This is the plate contents
            + _gen_filler(n_cols, has_empty=False)
            + _gen_filler(cols_after)
        )
        plate_rows.append(data_row)
    for row in range(rows_after):
        plate_rows.append(_gen_filler(tot_cols))
    return pd.DataFrame(plate_rows), results

In [5]:
def create_plates(
    n_plates: int,
    n_rows: int,
    n_cols: int,
    return_dict: bool = False,
) -> pd.DataFrame | dict[str, pd.DataFrame]:
    plates: list[pd.DataFrame] = []
    results: list[dict] = []
    offset = 0
    for i in range(n_plates):
        plate_df, result = create_nontabular_plate(n_rows, n_cols, offset)
        offset += plate_df.shape[0]
        plates.append(plate_df)
        results.append(result)
    return {f"plate_{i}": plate_data for i, plate_data in enumerate(plates)} if return_dict else pd.concat(plates, ignore_index=True), results

In [6]:
def _get_plate_from_result_dict(df: pd.DataFrame, result_dict: dict) -> pd.DataFrame:
    row_start, row_end = result_dict['row_start'], result_dict['row_end'] + 1
    col_start, col_end = result_dict['col_start'], result_dict['col_end'] + 1
    proposed_plate = df.iloc[
        row_start:row_end,
        col_start:col_end,
    ]
    return proposed_plate

In [7]:
plates_df, results = create_plates(random.randint(1, 10), 4, 6)
print(results)
_get_plate_from_result_dict(plates_df, results[0])

[{'row_start': 11, 'row_end': 14, 'col_start': 5, 'col_end': 10, 'contents': 'Plate'}, {'row_start': 33, 'row_end': 36, 'col_start': 3, 'col_end': 8, 'contents': 'Plate'}, {'row_start': 47, 'row_end': 50, 'col_start': 10, 'col_end': 15, 'contents': 'Plate'}, {'row_start': 69, 'row_end': 72, 'col_start': 7, 'col_end': 12, 'contents': 'Plate'}, {'row_start': 87, 'row_end': 90, 'col_start': 7, 'col_end': 12, 'contents': 'Plate'}, {'row_start': 99, 'row_end': 102, 'col_start': 5, 'col_end': 10, 'contents': 'Plate'}, {'row_start': 109, 'row_end': 112, 'col_start': 1, 'col_end': 6, 'contents': 'Plate'}, {'row_start': 121, 'row_end': 124, 'col_start': 9, 'col_end': 14, 'contents': 'Plate'}, {'row_start': 143, 'row_end': 146, 'col_start': 4, 'col_end': 9, 'contents': 'Plate'}, {'row_start': 158, 'row_end': 161, 'col_start': 10, 'col_end': 15, 'contents': 'Plate'}]


Unnamed: 0,5,6,7,8,9,10
11,84peQ41BqMo,,0.990539,A,0.474304,545.0
12,0.1179,,gugrWhoYFwTTIxZ6kbE9t,B,7xtoQvcWYgfPvwosYlPjwG,144.0
13,53,,,C,914,0.882762
14,,,,D,849,875.0


In [12]:
plates_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,...,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
0,,477,,373,,PQ8cKWElRJFQ1apFTDYycqKeWhCvo3yRXfRUSIFdn76S7,,,,55,,0.443162,,O7hyPDU4Bwuwv,,,,0.39796,,542,,vTkNRtc8IKHWpOUJRKUSkNS4UTM3J9EwC5L,,,,...,185,,0.83158,,580,,,,,,,,,,,,,,,,,,,,
1,,rEjrtntwRTVcGJeU3V4jnhEaf3KuufrPxmkn2jKm3,,,,0Z1CWz5X7s0uY,,,,470,,697,,0.234189,,,,,,TlsajRUYUfL7,,,,Sw4kwebnVvp1duayuPBCzsCJty4o6gAalFn7vl,,...,,,709,,Yjqhz72F3O36qHCQ0Kaa4rhIeOk3AOdBOdghfm0KIkg,,758.0,,0.180217,,,,,,,,,,,,,,,,
2,,8CbW14gXRDQ1RLUySGuS6vUqfhAPMgVeHYmds8V07I7qxU,,389,,AVwgoXpUmn7i5I,,346,,,,,,0.212275,,,,,,eVZOQ1TUXB7gPdzt5DH0ZyuexwxQW1hUiqy,,209,,111,,...,0.046949,,,,hiMaIoRriX2lQY2XHgamtc5XtCKsmuqAAW5nas,,0.607357,,600,,,,,,,,,,,,,,,,
3,,,,,,I4JCEnwESW97OSKiq6GZFbKiq9TrlQAQyb8,,,,,,,,,,,,,,0.57236,,0.620657,,BgH23csI54T9t2SLHappWFUfgWDgY0z5bMD,,...,,,,,RdAea3qg9Cd,,,,,,,,,,,,,,,,,,,,
4,,DgvJ2Oq3gis6IvaNnaN0gwLsedOY5IHOA,,,,,,92LzpcA9tZro3YWKrd3STgVfZID,,533,,,,0.957788,,RhloSXqjjj5x4npxhJtM,,240,,,,,,128,,...,874,,441,,,,,,489,,,,,,,,,,,,,,,,
5,,,,0.434247,,,,jtLRVC4zIRzV5Ykd0dGcgRH,,,,1gX5UaUbASrWECwza,,,,34,,VL6mBm4VVkr,,Ea7Hk0DhSYCgt18gqDMWb,,0.962792,,0.730425,,...,fsWvOdXgB7y3GY0t,,,,815,,,,,,,,,,,,,,,,,,,,
6,,,,,,,,0.844264,,7b9pNnjTkKZoGveAMTJ26AdPfvFRJQJ84wtneb0O5uDy,,,,nIu7LrGOGCIPRw7gDk2sEUB1vssPiUC,,505,,561,,S1YpFWuegqflsUU2Q0JciMFQVg6kHe82j8IwvkQX5UJ9,,701,,0.805495,,...,0.596149,,C7c1aBAobp4j,,,,,,iFcWQNAeddvyzlKNXJ2iY0W7VmPSV6XV33,,,,,,,,,,,,,,,,
7,,SynvRpfYOcJqB6r7VMbTjmXW7ydW3tNDr,,,,279,,,,,,lvL9G,,,,eNYyD,,ODBtsksPfnLgpEwexAHj2rWIY4,,535,,,,xTeVjl3O2vj1BKn4Xwn4xuyMlDRuvqPwojv,,...,,,,,,,,,0.818626,,,,,,,,,,,,,,,,
8,,905,,,,598,,wCjM6iq3WGNzgYN5DENMKrur5N8BWv3nuO4m23,,,,0.699538,,x2SkBBZilNGWVou0fn2y1SUW8Go,,,,369,,0.438216,,,,uKDYBCZT8c2bcXDH,,...,0.496452,,0.753934,,wS1t480GaY2,,,,623,,,,,,,,,,,,,,,,
9,,,,VDvB5DrjSsda6mmIjMnG2ZbIDNdY,,J4MT8,,0.368324,,,,,,,,3,,682,,383,,C8cNGe5PhuuKR4coKddEnLToKDL,,,,...,Kqznpgffk2VGyQDKM0uKMdDpZcdOQ9,,0.952975,,,,544.0,,94,,,,,,,,,,,,,,,,


In [103]:
evals = []
for i in range(100):
    plates_df, results = create_plates(random.randint(1, 10), 4, 6)
    evals.append({"input": plates_df.to_csv(index=False, header=False), "output":results})
for i in range(100):
    plates_df, results = create_plates(random.randint(1, 10), 8, 12)
    evals.append({"input": plates_df.to_csv(index=False, header=False), "output":results})
for i in range(100):
    plates_df, results = create_plates(random.randint(1, 10), 16, 24)
    evals.append({"input": plates_df.to_csv(index=False, header=False), "output":results})
for i in range(10):
    plates_df, results = create_plates(random.randint(1, 10), 32, 48)
    evals.append({"input": plates_df.to_csv(index=False, header=False), "output":results})

In [108]:
with open('gen_evals.json', 'w') as f:
    json.dump(evals, f)

In [114]:
plates_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33
0,,0lGZsTtZqRq1YGyEHYGFhR0DW7bIfUNSxJlbjCPHQLfa11KwC,,,,,,,,,,0.121591,,966,,,,112,,,,0.983377,,608.0,,,,,,gZO1PtE0nKJ3lwzDGw,,,,
1,,0.2955,,l5sIozlzJfnOQ67xrmUzpiBVZoB7TD,,0.269455,,,,,,129,,kVtQ5VBE49u,,,,,1,2,3,4,5,6.0,,,,,,,,,,
2,,722,,,,i6AstG22s8Cb88ktNduWfu2O4ZSMtT18ySfhFlK0d9t,,,,1z3LMVH9UTx5OxbazLSJl8pgxjLLspp7zWZPrJwAf02Nnshd7,,0.029341,,,,,A,0.020699,0.776257,oaqsnXhXqqFyvPybUFaapUftTdEroY303ahWMKxYBIZ,TryYzSOqwPejSMmr4DMJrEhoO55nbsTJ3SY,0.861957,lfBqcQsv2oMWoJCzLygxKIedwyQ7iXT1juIUDJThZdsB3r...,,kmzohBqQcxtPHkJmhODI6Ov2joDNmBMY,,,,,,,,,
3,,0.702402,,,,,,YTOxGPEGlSH7KlONI6uSzZyVj7PdJ3eVlf9fFALXmRgUdzcU,,,,268,,0.209087,,0.4656,B,20us17NGMB5AHUn72EXfiEXupxTMXtyUL5xDUg3MKuYID,70,D28qisjvRiONwW9nKhGuQoHmgHxVppiLJhgg,0.683478,907,0.827364,,,,,,,,,,,
4,,,,,,,,,,,,,,478,,,C,41,xhHtGvH9bRmofTl6PT4uRMUdZpqCUrHl7nib72o,8,0.013126,0.337339,948,,,,,,,,,,,
5,,0.15484,,,,,,,,945,,538,,KUk8wQH99CzjAR2t94GxwRMwLiXB7WRZhQDk29i7HBxm0i...,,0.224276,D,340,66,Fv78o91,e32TAsxqeqtjGXrxl9cQc5,NE73bDert9O6LTTieu0pDwOJtYlPjJ78z1A,cHuOBrgAI7jgxAusDWMMUlOx5tRa9KRz4Vem,,,,,,,,,,,
6,,,,,,0.531886,,,,0.342786,,,,,,0.696387,,DVg1fdLMN3vu,,,,151,,0.521799,,308.0,,,,,,,,
7,,0.045334,,,,,,,,,,bYGUODKj0l,,,,546.0,,,,IfBupSHirZ,,,,0.12874,,0.161239,,0.758832,,,,k98otDxWQYlNmX7Fr1uVsWi8y,,
8,,0.439353,,,,,,148,,,,0.665736,,0.716956,,,,975,,,,158,,411.0,,,,,,,,,,
9,,ooMs0vYObIIXBsYwkp,,0.904097,,eyODFr,,,,,,,,,,,,337,,252,,,,,,,,,,,,,,a6EurjfKbqRwNAsTHOw6LlU6yEoFL26LXPVofBm0
