In [611]:
import pandas as pd
import random
import string

In [612]:
def generate_alphabet_tuples():
    return [(chr(i), []) for i in range(ord('a'), ord('z')+1)]


In [613]:
def generate_character_mapping():
    alphabet_tuples = generate_alphabet_tuples()
    for i in range(8):
        ascii_low = string.ascii_lowercase
        for t in alphabet_tuples:
            c =  random.choice(ascii_low)
            t[1].append(c)
            ascii_low =ascii_low.replace(c,'')
    
    
    return alphabet_tuples

In [614]:
alphabet_tuples = generate_character_mapping()

In [615]:
alphabet_tuples

[('a', ['x', 'l', 'g', 'j', 'q', 'j', 't', 'd']),
 ('b', ['a', 'e', 'p', 'p', 'b', 'w', 'e', 'j']),
 ('c', ['b', 'v', 'q', 'b', 'v', 't', 'p', 'h']),
 ('d', ['t', 'i', 'x', 'w', 'h', 'u', 'd', 'e']),
 ('e', ['m', 'm', 'a', 'n', 'z', 'i', 'n', 'g']),
 ('f', ['j', 'b', 'n', 'h', 'c', 'c', 'y', 'o']),
 ('g', ['v', 'n', 'y', 'q', 'p', 'r', 'r', 'i']),
 ('h', ['s', 's', 'e', 'k', 't', 'h', 'k', 'n']),
 ('i', ['c', 'x', 'v', 'u', 'e', 'b', 'c', 'a']),
 ('j', ['h', 'z', 'f', 's', 's', 'q', 'z', 'c']),
 ('k', ['y', 'u', 'm', 'o', 'd', 'v', 'a', 's']),
 ('l', ['w', 'd', 'o', 'x', 'w', 'o', 'g', 'v']),
 ('m', ['f', 'w', 'j', 'f', 'g', 'd', 'h', 'z']),
 ('n', ['n', 'q', 'c', 'm', 'y', 'x', 'q', 'y']),
 ('o', ['k', 't', 'k', 'a', 'u', 'n', 'm', 'q']),
 ('p', ['q', 'h', 'd', 'l', 'k', 's', 'j', 'p']),
 ('q', ['o', 'k', 'b', 'd', 'a', 'm', 'f', 't']),
 ('r', ['i', 'o', 'h', 't', 'l', 'l', 's', 'k']),
 ('s', ['u', 'r', 's', 'c', 'o', 'e', 'l', 'l']),
 ('t', ['p', 'j', 'w', 'r', 'r', 'k', 'w', 'm']),


## change to dataframe 

In [616]:
def generate_alphabet_df():
    # dataframe with indexes from 'a' to 'z' and columns 'value_1' to 'value_8'
    alphabet_df = pd.DataFrame(index=[chr(i) for i in range(ord('a'), ord('d')+1)], 
                               columns=['column_'+str(i) for i in range(1,3)])
    alphabet_df.index.name = "carrier_letter"
    
    
    for each_column in alphabet_df.columns:
        ascii_low = string.ascii_lowercase
        for each_index in alphabet_df.index:
            c = random.choice(ascii_low)
            alphabet_df.loc[each_index, each_column] = c
            ascii_low = ascii_low.replace(c,'')
            
    alphabet_df.reset_index(level=0, inplace=True)
            
    return alphabet_df

In [617]:
alphabet_df = generate_alphabet_df()


In [618]:
print(alphabet_df)

  carrier_letter column_1 column_2
0              a        f        q
1              b        i        n
2              c        q        x
3              d        y        g


In [619]:
def shift_single_column(df, column_name, shift_value =1):
    column_to_shift = df[column_name]
    
    lost_values = column_to_shift[-shift_value:]
    
    shifted_column = df[column_name].shift(shift_value)
   
    df[column_name] = pd.concat([lost_values, shifted_column[shift_value:]]).reset_index(drop=True)

    return df


In [620]:
alphabet_df_shifted = shift_single_column(alphabet_df, 'column_1')

In [621]:
alphabet_df_shifted

Unnamed: 0,carrier_letter,column_1,column_2
0,a,y,q
1,b,f,n
2,c,i,x
3,d,q,g


## shift columns to achieve no repetition in rows

In [622]:
data = {
    'carrier_letter': ['a', 'b', 'c', 'd'],
    'column_1': ['i', 'z', 'l', 'v'],
    'column_2': ['i', 'l', 'w', 'r'],
    'column_3': ['g', 'h', 'o', 'y'],
    'column_4': ['g', 'i', 'k', 'o'],
    'column_5': ['g', 'b', 'c', 'd'],
}

df = pd.DataFrame(data)

In [623]:
df

Unnamed: 0,carrier_letter,column_1,column_2,column_3,column_4,column_5
0,a,i,i,g,g,g
1,b,z,l,h,i,b
2,c,l,w,o,k,c
3,d,v,r,y,o,d


In [624]:
df.columns[1:]
# get index of the column by name
column_index = df.columns.get_loc('column_1')
column_index

1

In [625]:
# one random column
column_name = random.choice(df.columns[1:])
column_name

'column_1'

In [626]:
def move_columns(df,column, column_to_change):
    """Function shifts the values of a column in one column, so there will be no same characters in the same row.
       Function should return df with columns with no same characters in the same row.
    """
    iter = 0
    was_shifted = False
    
    while any(df[column] == df[column_to_change]):
        print(f"Column {column} has same values as {column_to_change}")
        df = shift_single_column(df, column_to_change)
        was_shifted = True
        iter += 1
        if iter > 100:
            print("Too many iterations, breaking out of the loop.")
            break
    
    return df, was_shifted

In [627]:
def fix_repetition_for_one_column(df,  column_to_change, iter):
    was_shifted = False
    for column in df.columns[1:iter]:
        print(f"Checking column {column} for repetition with {column_to_change}")
        if column == column_to_change: #necessary to skip the column that is being changed
            continue
        df , was_shifted = move_columns(df,column, column_to_change)

    return df, was_shifted

In [628]:
def fix_repetition(df):
    """Function iterates over all columns comparing them and shifting the values; after shift it begins comparing columns from the start"""
    iter = -2
    
    was_shifted = True
    # skip carrier_letter and first column and fix repetition in the rest of the columns
    for column_to_change in df.columns[2:]:
        # print(column_to_change)
        while was_shifted:
            was_shifted = False
            df, was_shifted =fix_repetition_for_one_column(df,  column_to_change, iter)
            print(column_to_change)
            
        iter += 1
        was_shifted = True

    return df

In [629]:
# def move_columns(df,column, column_to_change):
#     # Start with this as False, and only update it once an actual shift has been made.
#     was_shifted = False
#     iter = 0
    
#     while any(df[column] == df[column_to_change]):
#         print(f"Column {column} has same values as {column_to_change}")
#         df = shift_single_column(df, column_to_change)
#         was_shifted = True
#         iter += 1
#         if iter > 100:
#             print("Too many iterations, breaking out of the loop.")
#             break

#     return df, was_shifted

# def fix_repetition_for_one_column(df,  column_to_change, iter):
#     # Start with this as False, and only update it once an actual shift has been made.
#     was_shifted = False
#     for column in df.columns[1:iter]:
#         print(f"Checking column {column} for repetition with {column_to_change}")
#         if column == column_to_change: #necessary to skip the column that is being changed
#             continue
#         df , was_shifted = move_columns(df,column, column_to_change)

#     return df, was_shifted

# def fix_repetition(df):
#     """Function iterates over all columns comparing them and shifting the values; after shift it begins comparing columns from the start"""
#     iter = -1*(len(df.columns)-2)
#     was_shifted = True # Initially value is true to make sure loop is entered.

#     # skip carrier_letter and first column and fix repetition in the rest of the columns
#     for column_to_change in df.columns[2:]:
#         while was_shifted:
#             was_shifted = False
#             df, was_shifted = fix_repetition_for_one_column(df,  column_to_change, iter)
#             print(column_to_change)
            
#         iter += 1
#         was_shifted = True # Reset this value so loop is re-entered for the next column.

#     return df

# df_fixed = fix_repetition(df)


In [630]:
print(df)

  carrier_letter column_1 column_2 column_3 column_4 column_5
0              a        i        i        g        g        g
1              b        z        l        h        i        b
2              c        l        w        o        k        c
3              d        v        r        y        o        d


In [631]:
df_fixed = fix_repetition(df)


Checking column column_1 for repetition with column_2
Column column_1 has same values as column_2
Column column_1 has same values as column_2
Checking column column_2 for repetition with column_2
Checking column column_3 for repetition with column_2
column_2
Checking column column_1 for repetition with column_3
Checking column column_2 for repetition with column_3
Checking column column_3 for repetition with column_3
Checking column column_4 for repetition with column_3
Column column_4 has same values as column_3
Column column_4 has same values as column_3
column_3
Checking column column_1 for repetition with column_3
Checking column column_2 for repetition with column_3
Checking column column_3 for repetition with column_3
Checking column column_4 for repetition with column_3
column_3
column_4
column_5


In [632]:
print(df_fixed)

  carrier_letter column_1 column_2 column_3 column_4 column_5
0              a        i        w        o        g        g
1              b        z        r        y        i        b
2              c        l        i        g        k        c
3              d        v        l        h        o        d


In [633]:

num_columns = len(df.columns)
print(num_columns)

6
