In [319]:
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange
from numpy.polynomial.polynomial import Polynomial
import string

In [320]:
# Create a DataFrame
df = pd.DataFrame()

In [321]:
def textToIntUTF(text):
    # Convert the text to bytes using UTF-8 encoding
    byte_representation = text.encode('utf-8')
    # Convert the bytes to integer
    integer_representation = int.from_bytes(
        byte_representation, byteorder='big')
    return integer_representation


def normalize(num):
    min = 35322350018592
    max = 139081753198206
    return (num - min)/(max-min)


def denormalize(num):
    min = 35322350018592
    max = 139081753198206
    return num*(max-min) + min


def intToTextUTF(num):
    # Convert the integer to bytes using UTF-8 encoding
    byte_representation = num.to_bytes(
        (num.bit_length() + 7) // 8, byteorder='big')
    # Convert the bytes to string using UTF-8 encoding
    text = byte_representation.decode('utf-8')
    return text


print(textToIntUTF('~~~'))  # 139081753198206
print(textToIntUTF('   '))  # 35322350018592

text = "odha#@"
print(normalize(textToIntUTF(text)))
res = normalize(textToIntUTF(text))
print(textToIntUTF(text))

res = denormalize(res)

print(intToTextUTF(int(res)))

# 2
# 1.999999
# Higher Precision Means more iterations with root finding algorithm

8289918
2105376
0.8399690622714339
122477038609216
odha#@


In [322]:
samples = 10000
rand_mat_samples = 100
start = 0
end = 100
odd_start = 3
odd_end = 14  # python ignores the last number
text_size = 4

# max_val = 8289918
# min_val = 2105376

max_val = 2122219134
min_val = 538976288

In [323]:
def x_random_numbers():
    x1 = np.random.randint(start, end)
    x2 = np.random.randint(start, end)
    while x1 == x2:
        x2 = np.random.randint(start, end)
    return min(x1, x2), max(x1, x2)


x = []
# Generate random numbers and add them to the DataFrame
for i in range(samples):
    x1, x2 = x_random_numbers()
    x.append([x1, x2])

df['x'] = x

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,x
0,"[16, 93]"
1,"[13, 84]"
2,"[14, 64]"
3,"[48, 93]"
4,"[29, 94]"


In [324]:
def generate_numbers(df, start=start+1, end=end, samples=samples):
    numbers = np.random.randint(start, end, samples)
    df['y'] = [[-num, num] for num in numbers]


generate_numbers(df)

In [325]:
def random_state(df, start=start, end=end, samples=None):
    samples = len(df) if samples is None else samples
    df['random_state'] = np.random.randint(start, end, samples)


random_state(df)
df.head()

Unnamed: 0,x,y,random_state
0,"[16, 93]","[-68, 68]",33
1,"[13, 84]","[-21, 21]",0
2,"[14, 64]","[-44, 44]",19
3,"[48, 93]","[-46, 46]",11
4,"[29, 94]","[-73, 73]",96


In [326]:
odd_numbers = np.random.choice(range(odd_start, odd_end, 2), samples)
df['sections'] = odd_numbers

In [327]:
random_mat = pd.read_csv('random_matrix.csv')
random_mat.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,83,90,22,78,5,55,64,27,11,92,11,28,26,35
1,56,32,85,37,40,93,51,62,99,26,34,77,8,88
2,8,32,67,85,37,52,81,17,24,49,98,97,60,70
3,43,61,92,21,85,23,92,14,93,18,94,83,76,6
4,33,62,89,17,96,78,59,76,0,75,99,5,4,49


In [328]:
def add_random_mat(df, random_mat):
    def get_random_mat_row(row):
        index = row['random_state'] % (rand_mat_samples)
        return random_mat.iloc[index].values

    df['random_mat_row'] = df.apply(get_random_mat_row, axis=1)
    return df


add_random_mat(df, random_mat)

Unnamed: 0,x,y,random_state,sections,random_mat_row
0,"[16, 93]","[-68, 68]",33,9,"[58, 37, 30, 56, 88, 32, 98, 19, 7, 79, 60, 93..."
1,"[13, 84]","[-21, 21]",0,11,"[83, 90, 22, 78, 5, 55, 64, 27, 11, 92, 11, 28..."
2,"[14, 64]","[-44, 44]",19,9,"[97, 91, 14, 58, 92, 42, 58, 99, 54, 52, 85, 5..."
3,"[48, 93]","[-46, 46]",11,9,"[78, 71, 24, 33, 43, 54, 33, 68, 90, 99, 22, 5..."
4,"[29, 94]","[-73, 73]",96,11,"[74, 38, 77, 91, 25, 92, 0, 11, 86, 35, 7, 89,..."
...,...,...,...,...,...
9995,"[22, 94]","[-40, 40]",24,3,"[75, 28, 71, 22, 90, 18, 64, 6, 15, 23, 27, 40..."
9996,"[39, 89]","[-81, 81]",88,13,"[62, 78, 49, 88, 21, 89, 56, 97, 57, 54, 70, 2..."
9997,"[31, 73]","[-31, 31]",52,9,"[20, 93, 67, 27, 90, 6, 74, 83, 87, 37, 91, 4,..."
9998,"[33, 45]","[-91, 91]",29,7,"[60, 28, 92, 31, 94, 11, 47, 28, 50, 96, 81, 6..."


In [329]:
# Function to generate evenly spaced numbers
def generate_points_y(row):
    return list(np.linspace(row['y'][0], row['y'][1], row['sections']+1))


def generate_points_x(row):
    return list(np.linspace(row['x'][0], row['x'][1], row['sections']+1))


# Apply the function to each row
df['x_points'] = df.apply(generate_points_x, axis=1)
df['y_points'] = df.apply(generate_points_y, axis=1)

df.head()

Unnamed: 0,x,y,random_state,sections,random_mat_row,x_points,y_points
0,"[16, 93]","[-68, 68]",33,9,"[58, 37, 30, 56, 88, 32, 98, 19, 7, 79, 60, 93...","[16.0, 24.555555555555557, 33.111111111111114,...","[-68.0, -52.888888888888886, -37.7777777777777..."
1,"[13, 84]","[-21, 21]",0,11,"[83, 90, 22, 78, 5, 55, 64, 27, 11, 92, 11, 28...","[13.0, 19.454545454545453, 25.909090909090907,...","[-21.0, -17.18181818181818, -13.36363636363636..."
2,"[14, 64]","[-44, 44]",19,9,"[97, 91, 14, 58, 92, 42, 58, 99, 54, 52, 85, 5...","[14.0, 19.555555555555557, 25.11111111111111, ...","[-44.0, -34.22222222222222, -24.44444444444444..."
3,"[48, 93]","[-46, 46]",11,9,"[78, 71, 24, 33, 43, 54, 33, 68, 90, 99, 22, 5...","[48.0, 53.0, 58.0, 63.0, 68.0, 73.0, 78.0, 83....","[-46.0, -35.77777777777778, -25.55555555555555..."
4,"[29, 94]","[-73, 73]",96,11,"[74, 38, 77, 91, 25, 92, 0, 11, 86, 35, 7, 89,...","[29.0, 34.90909090909091, 40.81818181818182, 4...","[-73.0, -59.72727272727273, -46.45454545454545..."


In [330]:
df['rand_vals'] = df.apply(
    lambda row: row['random_mat_row'][:row['sections']+1], axis=1)

In [331]:
df['points'] = df.apply(lambda row: list(
    zip(row['x_points'], row['y_points'])), axis=1)

In [332]:
def update_points(row):
    new_points = []
    for i, (x, y) in enumerate(row['points']):
        if i % 2 == 0:  # subtract for even index
            new_y = y - row['rand_vals'][i]
        else:  # add for odd index
            new_y = y + row['rand_vals'][i]
        new_points.append((x, new_y))
    return new_points


df['poly_points'] = df.apply(update_points, axis=1)

In [333]:
def interpolate_points(row):
    x, y = zip(*row['poly_points'])
    poly = lagrange(x, y)
    return Polynomial(poly).coef.tolist()


df['polynomial'] = df.apply(interpolate_points, axis=1)

In [334]:
def generate_random_text(length):
    chars = np.array(list(string.ascii_letters + string.digits))
    text = ''.join(np.random.choice(chars) for _ in range(length))
    return text


df['rand_text'] = df.apply(lambda _: generate_random_text(text_size), axis=1)

In [335]:
# Function to convert text to its UTF-8 integer representation
def text_to_int(text):
    return int.from_bytes(text.encode('utf-8'), 'big')


# Apply the function to the 'rand_text' column and create a new column 'text_int'
df['text_int'] = df['rand_text'].apply(text_to_int)

In [336]:
# Normalize the 'text_int' column
df['text_normalized'] = (df['text_int'] - min_val) / (max_val - min_val)

In [337]:
def subtract_normalized(row):
    polynomial = list(row['polynomial'])
    normalized_value = float(row['text_normalized'])
    polynomial[-1] -= normalized_value
    return polynomial


df['polynomial_text_normalized'] = df.apply(subtract_normalized, axis=1)

In [338]:
def subtract_normalized(row):
    polynomial = list(row['polynomial'])
    text_int_value = float(row['text_int'])
    polynomial[-1] -= text_int_value
    return polynomial


df['polynomial_text_int'] = df.apply(subtract_normalized, axis=1)

In [339]:
# # Define a function to subtract the normalized value from the constant term in the polynomial
# def subtract_normalized(row):
#     polynomial = row['polynomial'].copy()
#     normalized_value = row['text_int_normalized']
#     polynomial[-1] -= normalized_value
#     return polynomial


# # Apply the function to each row and store the results in a new column
# df['polynomial_text'] = df.apply(subtract_normalized, axis=1)

In [340]:
# Subtract the last value of 'polynomial' from the last value of 'polynomial_text' and store the result in 'poly_text_representation'
df['poly_normalized_text_representation'] = df['polynomial'].apply(
    lambda x: x[-1]) - df['polynomial_text_normalized'].apply(lambda x: x[-1])

In [341]:
# Subtract the last value of 'polynomial' from the last value of 'polynomial_text' and store the result in 'poly_text_representation'
df['poly_int_text_representation'] = df['polynomial'].apply(
    lambda x: x[-1]) - df['polynomial_text_int'].apply(lambda x: x[-1])

In [342]:
# df[['polynomial', 'polynomial_text']].to_csv('polynomial.csv', index=False)

In [343]:
# save the first value in the array polynomial in polynomial column in a separate column
df['first_polynomial_coeff'] = df['polynomial'].copy().apply(lambda x: x[0])

In [344]:
# Get a Boolean Series where each element is True if the
# corresponding value in the 'first_polynomial_value' column
# is greater than 1
greater_than_zero = df['first_polynomial_coeff'].gt(1)

# Count the number of True values in the Series
count = greater_than_zero.sum()

print(count)

1414


In [345]:
df = df.loc[df['first_polynomial_coeff'] > 1]
df = df.loc[df['poly_int_text_representation'] != 0.0]

In [346]:
# import pandas as pd
# df = pd.read_csv("data.csv")

In [347]:
# f is a polynomial with the coeffs in polynomial_text_int
# a is the first value in x interval
# b is the first value in x interval+300


def blendBF(f, a, b, eps=10**(-2), max_iter=25):
    # print(f)
    # Initialize the variables
    n = 0
    a1 = a
    a2 = a
    b1 = b
    b2 = b
    # print(f"working on {a} to {b} and {f}")
    while True:
        # print(n)
        # Increment the iteration counter
        n += 1

        # Check if the number of iterations exceeds the maximum number of iterations
        if n > max_iter:
            return 'iter'
        # Evaluate the function at the endpoints
        fa = f(a)
        fb = f(b)

        # Compute the midpoint and the false position point
        xB = (a + b) / 2
        fxB = f(xB)

        if fb == fa:
            return 'div'
        else:
            xF = a - (fa * (b - a)) / (fb - fa)
            xF = a - (fa * (b - a)) / (fb - fa)
            fxF = f(xF)

        # Choose the one with the smaller absolute value as the root approximation
        if abs(fxB) < abs(fxF):
            x = xB
            fx = fxB
        else:
            x = xF
            fx = fxF

        # Check if the absolute value of fx is less than or equal to the tolerance
        if abs(fx) <= eps:
            # Return the output
            # n is the number of iterations
            # x is the root approximation
            # fx is the function value
            # a is the left endpoint
            # b is the right endpoint
            return x

        # Update the interval by applying the bisection and false position methods
        if fa * fxB < 0:
            b1 = xB
        else:
            a1 = xB

        if fa * fxF < 0:
            b2 = xF
        else:
            a2 = xF

        # Set a to the maximum of a1 and a2 and b to the minimum of b1 and b2
        a = max(a1, a2)
        b = min(b1, b2)

In [371]:
# Define the polynomial function
def polynomial(x, coeffs):
    return sum([coeff*(x**i) for i, coeff in enumerate(coeffs[::-1])])


# Apply the blendBF function
df['root'] = df.apply(lambda row: blendBF(lambda x: polynomial(x, row['polynomial_text_int']),
                                          row['x'][0],
                                          row['x'][0]+3000), axis=1)

In [372]:
length = len(df)
iter_rows = len(df[df['root'] == 'iter'])
div_rows = len(df[df['root'] == 'div'])
print(
f"""
rows = {length}
iter = {iter_rows}
div = {div_rows}
rows - (iter + div) = {length - (iter_rows + div_rows)}
""")


rows = 779
iter = 0
div = 0
rows - (iter + div) = 779



In [373]:
df = df.loc[df['root'] != 'iter']

In [374]:
def polynomial(x, coeffs):
    return sum([coeff*(x**i) for i, coeff in enumerate(coeffs[::-1])])


df['text_from_root'] = df.apply(lambda row: polynomial(
    row['root'], row['polynomial']), axis=1)

In [375]:
# TODO review this line
df['text_from_root'] = df['text_from_root'].round()

In [376]:
df_filtered = df[['poly_int_text_representation',
                  'root', 'rand_text', 'text_int', 'text_from_root']]

In [377]:
df.to_csv('data.csv', index=False)

In [378]:
df_filtered.to_csv('filtered_dataset.csv', index=False)