In [1]:
import pandas as pd
import random
import string

In [2]:
def generate_alphabet_tuples():
    return [(chr(i), []) for i in range(ord('a'), ord('z')+1)]


In [4]:
def generate_character_mapping():
    alphabet_tuples = generate_alphabet_tuples()
    for i in range(8):
        ascii_low = string.ascii_lowercase
        for t in alphabet_tuples:
            c =  random.choice(ascii_low)
            t[1].append(c)
            ascii_low =ascii_low.replace(c,'')
    
    
    return alphabet_tuples

In [5]:
alphabet_tuples = generate_character_mapping()

In [5]:
alphabet_tuples

[('a', ['a', 'x', 'j', 'm', 'b', 'f', 'p', 's']),
 ('b', ['q', 'g', 'a', 'h', 'z', 'x', 'z', 'a']),
 ('c', ['e', 'c', 'k', 'd', 'q', 'n', 'l', 'h']),
 ('d', ['d', 'z', 'z', 's', 'g', 's', 'n', 'c']),
 ('e', ['p', 't', 'm', 'b', 's', 'r', 'b', 'f']),
 ('f', ['n', 'y', 'h', 'p', 'n', 'u', 'y', 'r']),
 ('g', ['u', 'q', 's', 'q', 'e', 'g', 'k', 'z']),
 ('h', ['h', 'j', 'q', 'w', 'y', 'v', 'h', 'b']),
 ('i', ['z', 'a', 'u', 'v', 'r', 'z', 'm', 'k']),
 ('j', ['g', 'f', 'o', 'o', 'k', 'o', 'f', 'p']),
 ('k', ['r', 'r', 'c', 'f', 'h', 'a', 'c', 'n']),
 ('l', ['v', 'o', 'v', 'n', 'c', 'l', 'i', 'i']),
 ('m', ['j', 'w', 'x', 'a', 'u', 'j', 'j', 'y']),
 ('n', ['c', 'd', 'n', 'k', 'j', 'y', 'e', 't']),
 ('o', ['s', 'p', 'p', 'j', 'm', 'e', 'w', 'j']),
 ('p', ['o', 's', 'b', 'r', 'w', 'w', 'd', 'q']),
 ('q', ['w', 'u', 'g', 'u', 'x', 'i', 'v', 'g']),
 ('r', ['i', 'l', 'r', 'z', 'o', 'b', 's', 'o']),
 ('s', ['m', 'e', 'i', 'e', 'a', 'p', 'g', 'x']),
 ('t', ['y', 'i', 'w', 'g', 'd', 'm', 'r', 'w']),


## change to dataframe 

In [9]:
def generate_alphabet_df():
    # dataframe with indexes from 'a' to 'z' and columns 'value_1' to 'value_8'
    alphabet_df = pd.DataFrame(index=[chr(i) for i in range(ord('a'), ord('z')+1)], 
                               columns=['column_'+str(i) for i in range(1,8)])
    alphabet_df.index.name = "char_mapped"
    
    
    for each_column in alphabet_df.columns:
        ascii_low = string.ascii_lowercase
        for each_index in alphabet_df.index:
            c = random.choice(ascii_low)
            alphabet_df.loc[each_index, each_column] = c
            ascii_low = ascii_low.replace(c,'')
            
    alphabet_df.reset_index(level=0, inplace=True)
            
    return alphabet_df

In [10]:
alphabet_df = generate_alphabet_df()


In [11]:
print(alphabet_df)

   char_mapped column_1 column_2 column_3 column_4 column_5 column_6 column_7
0            a        u        s        b        n        f        f        x
1            b        i        c        c        o        v        b        w
2            c        m        z        l        m        k        q        l
3            d        s        v        j        a        n        l        h
4            e        n        p        v        g        p        j        p
5            f        p        r        k        x        r        r        y
6            g        d        q        z        v        b        i        t
7            h        e        d        x        q        u        c        j
8            i        x        n        g        p        d        n        r
9            j        j        h        r        z        y        m        u
10           k        k        m        s        e        s        t        n
11           l        r        u        f        r        t     

In [9]:
def shift_single_column(df, column_name, shift_value =1):
    column_to_shift = df[column_name]
    
    lost_values = column_to_shift[-shift_value:]
    
    shifted_column = df[column_name].shift(shift_value)
   
    df[column_name] = pd.concat([lost_values, shifted_column[shift_value:]]).reset_index(drop=True)

    return df


In [10]:
alphabet_df_shifted = shift_single_column(alphabet_df, 'column_1')

In [11]:
alphabet_df_shifted

Unnamed: 0,char_mapped,column_1,column_2
0,a,v,s
1,b,j,b
2,c,h,o
3,d,t,g


## shift columns to achieve no repetition in rows

In [12]:
data = {
    'char_mapped': ['a', 'b', 'c', 'd'],
    'column_1': ['i', 'z', 'l', 'v'],
    'column_2': ['i', 'l', 'w', 'r'],
    'column_3': ['g', 'h', 'o', 'y'],
    'column_4': ['g', 'i', 'k', 'o'],
    'column_5': ['g', 'b', 'c', 'd'],
}

df = pd.DataFrame(data)

In [13]:
df

Unnamed: 0,char_mapped,column_1,column_2,column_3,column_4,column_5
0,a,i,i,g,g,g
1,b,z,l,h,i,b
2,c,l,w,o,k,c
3,d,v,r,y,o,d


In [14]:
df.columns[1:]
# get index of the column by name
column_index = df.columns.get_loc('column_1')
column_index

1

In [15]:
# one random column
column_name = random.choice(df.columns[1:])
column_name

'column_1'

In [16]:
def move_columns(df,column, column_to_change):
    """Function shifts the values of a column in one column, so there will be no same characters in the same row.
       Function should return df with columns with no same characters in the same row.
    """
    iter = 0
    was_shifted = False
    
    while any(df[column] == df[column_to_change]):
        print(f"Column {column} has same values as {column_to_change}")
        df = shift_single_column(df, column_to_change)
        was_shifted = True
        iter += 1
        if iter > 100:
            print("Too many iterations, breaking out of the loop.")
            break
    
    return df, was_shifted

In [17]:
def fix_repetition_for_one_column(df,  column_to_change, iter):
    was_shifted = False
    for column in df.columns[1:iter]:
        print(f"Checking column {column} for repetition with {column_to_change}")
        if column == column_to_change: #necessary to skip the column that is being changed
            continue
        df , was_shifted = move_columns(df,column, column_to_change)

    return df, was_shifted

In [None]:
def fix_repetition(df):
    """Function iterates over all columns comparing them and shifting the values; after shift it begins comparing columns from the start"""
    iter = -1*len(df.columns[2:])
    
    was_shifted = True
    # skip char_mapped and first column and fix repetition in the rest of the columns
    for column_to_change in df.columns[2:]:
        while was_shifted:
            was_shifted = False
            df, was_shifted =fix_repetition_for_one_column(df,  column_to_change, iter)
            print(column_to_change)
            
        iter += 1
        was_shifted = True

    return df

In [19]:
print(df)

  char_mapped column_1 column_2 column_3 column_4 column_5
0           a        i        i        g        g        g
1           b        z        l        h        i        b
2           c        l        w        o        k        c
3           d        v        r        y        o        d


In [20]:
df_fixed = fix_repetition(df)


Checking column column_1 for repetition with column_2
Column column_1 has same values as column_2
Column column_1 has same values as column_2
column_2
Checking column column_1 for repetition with column_2
column_2
Checking column column_1 for repetition with column_3
Checking column column_2 for repetition with column_3
column_3
Checking column column_1 for repetition with column_4
Checking column column_2 for repetition with column_4
Checking column column_3 for repetition with column_4
Column column_3 has same values as column_4
column_4
Checking column column_1 for repetition with column_4
Checking column column_2 for repetition with column_4
Column column_2 has same values as column_4
Checking column column_3 for repetition with column_4
column_4
Checking column column_1 for repetition with column_5
Checking column column_2 for repetition with column_5
Checking column column_3 for repetition with column_5
Column column_3 has same values as column_5
Checking column column_4 for repe

In [21]:
print(df_fixed)

  char_mapped column_1 column_2 column_3 column_4 column_5
0           a        i        w        g        k        d
1           b        z        r        h        o        g
2           c        l        i        o        g        b
3           d        v        l        y        i        c


In [22]:

num_columns = len(df.columns)
print(num_columns)

6
