In [98]:
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange
from numpy.polynomial.polynomial import Polynomial
import string

In [99]:
# Create a DataFrame
df = pd.DataFrame()

In [100]:
def textToIntUTF(text):
    # Convert the text to bytes using UTF-8 encoding
    byte_representation = text.encode('utf-8')
    # Convert the bytes to integer
    integer_representation = int.from_bytes(
        byte_representation, byteorder='big')
    return integer_representation


def normalize(num):
    min = 35322350018592
    max = 139081753198206
    return (num - min)/(max-min)


def denormalize(num):
    min = 35322350018592
    max = 139081753198206
    return num*(max-min) + min


def intToTextUTF(num):
    # Convert the integer to bytes using UTF-8 encoding
    byte_representation = num.to_bytes(
        (num.bit_length() + 7) // 8, byteorder='big')
    # Convert the bytes to string using UTF-8 encoding
    text = byte_representation.decode('utf-8')
    return text


print(textToIntUTF('~~~'))  # 139081753198206
print(textToIntUTF('   '))  # 35322350018592

text = "odha#@"
print(normalize(textToIntUTF(text)))
res = normalize(textToIntUTF(text))
print(textToIntUTF(text))

res = denormalize(res)

print(intToTextUTF(int(res)))

# 2
# 1.999999
# Higher Precision Means more iterations with root finding algorithm

8289918
2105376
0.8399690622714339
122477038609216
odha#@


In [101]:
samples = 1000
rand_mat_samples = 100
start = 0
end = 100
odd_start = 3
odd_end = 14  # python ignores the last number
text_size = 6

# max_val = 8289918
# min_val = 2105376

max_val = 139081753198206
min_val = 35322350018592

In [102]:
def x_random_numbers():
    x1 = np.random.randint(start, end)
    x2 = np.random.randint(start, end)
    while x1 == x2:
        x2 = np.random.randint(start, end)
    return min(x1, x2), max(x1, x2)


x = []
# Generate random numbers and add them to the DataFrame
for i in range(samples):
    x1, x2 = x_random_numbers()
    x.append([x1, x2])

df['x'] = x

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,x
0,"[29, 84]"
1,"[72, 74]"
2,"[8, 18]"
3,"[42, 50]"
4,"[62, 76]"


In [103]:
def generate_numbers(df, start=start+1, end=end, samples=samples):
    numbers = np.random.randint(start, end, samples)
    df['y'] = [[-num, num] for num in numbers]


generate_numbers(df)

In [104]:
def random_state(df, start=start, end=end, samples=None):
    samples = len(df) if samples is None else samples
    df['random_state'] = np.random.randint(start, end, samples)


random_state(df)
df.head()

Unnamed: 0,x,y,random_state
0,"[29, 84]","[-50, 50]",9
1,"[72, 74]","[-88, 88]",87
2,"[8, 18]","[-97, 97]",35
3,"[42, 50]","[-6, 6]",58
4,"[62, 76]","[-88, 88]",19


In [105]:
odd_numbers = np.random.choice(range(odd_start, odd_end, 2), samples)
df['sections'] = odd_numbers

In [106]:
random_mat = pd.read_csv('random_matrix.csv')
random_mat.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,83,90,22,78,5,55,64,27,11,92,11,28,26,35
1,56,32,85,37,40,93,51,62,99,26,34,77,8,88
2,8,32,67,85,37,52,81,17,24,49,98,97,60,70
3,43,61,92,21,85,23,92,14,93,18,94,83,76,6
4,33,62,89,17,96,78,59,76,0,75,99,5,4,49


In [107]:
def add_random_mat(df, random_mat):
    def get_random_mat_row(row):
        index = row['random_state'] % (rand_mat_samples)
        return random_mat.iloc[index].values

    df['random_mat_row'] = df.apply(get_random_mat_row, axis=1)
    return df


add_random_mat(df, random_mat)

Unnamed: 0,x,y,random_state,sections,random_mat_row
0,"[29, 84]","[-50, 50]",9,7,"[15, 35, 86, 71, 40, 80, 89, 33, 20, 17, 56, 4..."
1,"[72, 74]","[-88, 88]",87,3,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8..."
2,"[8, 18]","[-97, 97]",35,3,"[89, 10, 77, 57, 9, 83, 50, 56, 8, 52, 59, 0, ..."
3,"[42, 50]","[-6, 6]",58,11,"[15, 41, 88, 98, 7, 85, 0, 20, 54, 5, 77, 24, ..."
4,"[62, 76]","[-88, 88]",19,3,"[97, 91, 14, 58, 92, 42, 58, 99, 54, 52, 85, 5..."
...,...,...,...,...,...
995,"[14, 17]","[-29, 29]",22,13,"[5, 44, 7, 63, 91, 84, 81, 94, 59, 47, 57, 51,..."
996,"[16, 79]","[-95, 95]",14,11,"[51, 5, 49, 28, 94, 85, 30, 72, 85, 48, 68, 99..."
997,"[21, 45]","[-79, 79]",53,5,"[16, 57, 51, 72, 45, 99, 68, 75, 63, 50, 69, 7..."
998,"[60, 63]","[-86, 86]",75,3,"[55, 31, 23, 4, 16, 90, 42, 75, 44, 99, 7, 5, ..."


In [108]:
# Function to generate evenly spaced numbers
def generate_points_y(row):
    return list(np.linspace(row['y'][0], row['y'][1], row['sections']+1))


def generate_points_x(row):
    return list(np.linspace(row['x'][0], row['x'][1], row['sections']+1))


# Apply the function to each row
df['x_points'] = df.apply(generate_points_x, axis=1)
df['y_points'] = df.apply(generate_points_y, axis=1)

df.head()

Unnamed: 0,x,y,random_state,sections,random_mat_row,x_points,y_points
0,"[29, 84]","[-50, 50]",9,7,"[15, 35, 86, 71, 40, 80, 89, 33, 20, 17, 56, 4...","[29.0, 36.857142857142854, 44.714285714285715,...","[-50.0, -35.714285714285715, -21.4285714285714..."
1,"[72, 74]","[-88, 88]",87,3,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8...","[72.0, 72.66666666666667, 73.33333333333333, 7...","[-88.0, -29.333333333333336, 29.33333333333333..."
2,"[8, 18]","[-97, 97]",35,3,"[89, 10, 77, 57, 9, 83, 50, 56, 8, 52, 59, 0, ...","[8.0, 11.333333333333334, 14.666666666666668, ...","[-97.0, -32.33333333333333, 32.33333333333334,..."
3,"[42, 50]","[-6, 6]",58,11,"[15, 41, 88, 98, 7, 85, 0, 20, 54, 5, 77, 24, ...","[42.0, 42.72727272727273, 43.45454545454545, 4...","[-6.0, -4.909090909090909, -3.8181818181818183..."
4,"[62, 76]","[-88, 88]",19,3,"[97, 91, 14, 58, 92, 42, 58, 99, 54, 52, 85, 5...","[62.0, 66.66666666666667, 71.33333333333333, 7...","[-88.0, -29.333333333333336, 29.33333333333333..."


In [109]:
df['rand_vals'] = df.apply(
    lambda row: row['random_mat_row'][:row['sections']+1], axis=1)

In [110]:
df['points'] = df.apply(lambda row: list(
    zip(row['x_points'], row['y_points'])), axis=1)

In [111]:
def update_points(row):
    new_points = []
    for i, (x, y) in enumerate(row['points']):
        if i % 2 == 0:  # subtract for even index
            new_y = y - row['rand_vals'][i]
        else:  # add for odd index
            new_y = y + row['rand_vals'][i]
        new_points.append((x, new_y))
    return new_points


df['poly_points'] = df.apply(update_points, axis=1)

In [112]:
def interpolate_points(row):
    x, y = zip(*row['poly_points'])
    poly = lagrange(x, y)
    return Polynomial(poly).coef.tolist()


df['polynomial'] = df.apply(interpolate_points, axis=1)

In [113]:
def generate_random_text(length):
    chars = np.array(list(string.ascii_letters + string.digits))
    text = ''.join(np.random.choice(chars) for _ in range(length))
    return text


df['rand_text'] = df.apply(lambda _: generate_random_text(text_size), axis=1)

In [114]:
# Function to convert text to its UTF-8 integer representation
def text_to_int(text):
    return int.from_bytes(text.encode('utf-8'), 'big')


# Apply the function to the 'rand_text' column and create a new column 'text_int'
df['text_int'] = df['rand_text'].apply(text_to_int)

In [115]:
# Normalize the 'text_int' column
df['text_int_normalized'] = (df['text_int'] - min_val) / (max_val - min_val)

In [116]:
# Define a function to subtract the normalized value from the constant term in the polynomial
def subtract_normalized(row):
    polynomial = row['polynomial']
    normalized_value = row['text_int_normalized']
    polynomial[-1] -= normalized_value
    return polynomial


# Apply the function to each row and store the results in a new column
df['polynomial_text'] = df.apply(subtract_normalized, axis=1)

In [117]:
# Subtract the last value of 'polynomial' from the last value of 'polynomial_text' and store the result in 'poly_text_representation'
df['poly_text_representation'] = df['polynomial_text'].apply(
    lambda x: x[-1]) - df['polynomial'].apply(lambda x: x[-1])

In [118]:
# df[['polynomial', 'polynomial_text']].to_csv('polynomial.csv', index=False)

In [119]:
df_essential = df[['x', 'rand_text', 'polynomial']]

In [120]:
df.to_csv('data.csv', index=False)

In [121]:
df_essential.to_csv('filtered_dataset.csv', index=False)