In [240]:
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange
from numpy.polynomial.polynomial import Polynomial
import string

In [241]:
# Create a DataFrame
df = pd.DataFrame()

In [242]:
def textToIntUTF(text):
    # Convert the text to bytes using UTF-8 encoding
    byte_representation = text.encode('utf-8')
    # Convert the bytes to integer
    integer_representation = int.from_bytes(
        byte_representation, byteorder='big')
    return integer_representation


def normalize(num):
    min = 35322350018592
    max = 139081753198206
    return (num - min)/(max-min)


def denormalize(num):
    min = 35322350018592
    max = 139081753198206
    return num*(max-min) + min


def intToTextUTF(num):
    # Convert the integer to bytes using UTF-8 encoding
    byte_representation = num.to_bytes(
        (num.bit_length() + 7) // 8, byteorder='big')
    # Convert the bytes to string using UTF-8 encoding
    text = byte_representation.decode('utf-8')
    return text


print(textToIntUTF('~~~'))  # 139081753198206
print(textToIntUTF('   '))  # 35322350018592

text = "odha#@"
print(normalize(textToIntUTF(text)))
res = normalize(textToIntUTF(text))
print(textToIntUTF(text))

res = denormalize(res)

print(intToTextUTF(int(res)))

# 2
# 1.999999
# Higher Precision Means more iterations with root finding algorithm

8289918
2105376
0.8399690622714339
122477038609216
odha#@


In [243]:
samples = 10000
rand_mat_samples = 100
start = 0
end = 100
odd_start = 3
odd_end = 14  # python ignores the last number
text_size = 4

# max_val = 8289918
# min_val = 2105376

max_val = 2122219134
min_val = 538976288

In [244]:
def x_random_numbers():
    x1 = np.random.randint(start, end)
    x2 = np.random.randint(start, end)
    while x1 == x2:
        x2 = np.random.randint(start, end)
    return min(x1, x2), max(x1, x2)


x = []
# Generate random numbers and add them to the DataFrame
for i in range(samples):
    x1, x2 = x_random_numbers()
    x.append([x1, x2])

df['x'] = x

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,x
0,"[49, 62]"
1,"[32, 62]"
2,"[49, 96]"
3,"[27, 64]"
4,"[10, 36]"


In [245]:
def generate_numbers(df, start=start+1, end=end, samples=samples):
    numbers = np.random.randint(start, end, samples)
    df['y'] = [[-num, num] for num in numbers]


generate_numbers(df)

In [246]:
def random_state(df, start=start, end=end, samples=None):
    samples = len(df) if samples is None else samples
    df['random_state'] = np.random.randint(start, end, samples)


random_state(df)
df.head()

Unnamed: 0,x,y,random_state
0,"[49, 62]","[-57, 57]",87
1,"[32, 62]","[-56, 56]",86
2,"[49, 96]","[-81, 81]",87
3,"[27, 64]","[-97, 97]",94
4,"[10, 36]","[-21, 21]",42


In [247]:
odd_numbers = np.random.choice(range(odd_start, odd_end, 2), samples)
df['sections'] = odd_numbers

In [248]:
random_mat = pd.read_csv('random_matrix.csv')
random_mat.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,83,90,22,78,5,55,64,27,11,92,11,28,26,35
1,56,32,85,37,40,93,51,62,99,26,34,77,8,88
2,8,32,67,85,37,52,81,17,24,49,98,97,60,70
3,43,61,92,21,85,23,92,14,93,18,94,83,76,6
4,33,62,89,17,96,78,59,76,0,75,99,5,4,49


In [249]:
def add_random_mat(df, random_mat):
    def get_random_mat_row(row):
        index = row['random_state'] % (rand_mat_samples)
        return random_mat.iloc[index].values

    df['random_mat_row'] = df.apply(get_random_mat_row, axis=1)
    return df


add_random_mat(df, random_mat)

Unnamed: 0,x,y,random_state,sections,random_mat_row
0,"[49, 62]","[-57, 57]",87,7,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8..."
1,"[32, 62]","[-56, 56]",86,7,"[12, 56, 82, 51, 23, 46, 9, 27, 38, 15, 57, 14..."
2,"[49, 96]","[-81, 81]",87,7,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8..."
3,"[27, 64]","[-97, 97]",94,9,"[97, 48, 11, 29, 97, 83, 53, 23, 9, 78, 1, 35,..."
4,"[10, 36]","[-21, 21]",42,7,"[69, 44, 50, 43, 25, 40, 54, 63, 17, 86, 35, 4..."
...,...,...,...,...,...
9995,"[95, 97]","[-37, 37]",45,9,"[27, 70, 44, 29, 46, 70, 61, 57, 21, 13, 18, 4..."
9996,"[76, 87]","[-21, 21]",74,7,"[59, 17, 53, 52, 2, 32, 99, 82, 57, 89, 19, 5,..."
9997,"[45, 54]","[-93, 93]",82,7,"[54, 93, 14, 64, 30, 67, 98, 33, 76, 68, 95, 7..."
9998,"[14, 52]","[-21, 21]",13,11,"[0, 98, 70, 21, 96, 57, 54, 54, 85, 21, 5, 53,..."


In [250]:
# Function to generate evenly spaced numbers
def generate_points_y(row):
    return list(np.linspace(row['y'][0], row['y'][1], row['sections']+1))


def generate_points_x(row):
    return list(np.linspace(row['x'][0], row['x'][1], row['sections']+1))


# Apply the function to each row
df['x_points'] = df.apply(generate_points_x, axis=1)
df['y_points'] = df.apply(generate_points_y, axis=1)

df.head()

Unnamed: 0,x,y,random_state,sections,random_mat_row,x_points,y_points
0,"[49, 62]","[-57, 57]",87,7,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8...","[49.0, 50.857142857142854, 52.714285714285715,...","[-57.0, -40.714285714285715, -24.4285714285714..."
1,"[32, 62]","[-56, 56]",86,7,"[12, 56, 82, 51, 23, 46, 9, 27, 38, 15, 57, 14...","[32.0, 36.285714285714285, 40.57142857142857, ...","[-56.0, -40.0, -24.0, -8.0, 8.0, 24.0, 40.0, 5..."
2,"[49, 96]","[-81, 81]",87,7,"[27, 44, 67, 44, 21, 78, 61, 73, 71, 21, 74, 8...","[49.0, 55.714285714285715, 62.42857142857143, ...","[-81.0, -57.85714285714286, -34.71428571428571..."
3,"[27, 64]","[-97, 97]",94,9,"[97, 48, 11, 29, 97, 83, 53, 23, 9, 78, 1, 35,...","[27.0, 31.11111111111111, 35.22222222222222, 3...","[-97.0, -75.44444444444444, -53.88888888888888..."
4,"[10, 36]","[-21, 21]",42,7,"[69, 44, 50, 43, 25, 40, 54, 63, 17, 86, 35, 4...","[10.0, 13.714285714285715, 17.42857142857143, ...","[-21.0, -15.0, -9.0, -3.0, 3.0, 9.0, 15.0, 21.0]"


In [251]:
df['rand_vals'] = df.apply(
    lambda row: row['random_mat_row'][:row['sections']+1], axis=1)

In [252]:
df['points'] = df.apply(lambda row: list(
    zip(row['x_points'], row['y_points'])), axis=1)

In [253]:
def update_points(row):
    new_points = []
    for i, (x, y) in enumerate(row['points']):
        if i % 2 == 0:  # subtract for even index
            new_y = y - row['rand_vals'][i]
        else:  # add for odd index
            new_y = y + row['rand_vals'][i]
        new_points.append((x, new_y))
    return new_points


df['poly_points'] = df.apply(update_points, axis=1)

In [254]:
def interpolate_points(row):
    x, y = zip(*row['poly_points'])
    poly = lagrange(x, y)
    return Polynomial(poly).coef.tolist()


df['polynomial'] = df.apply(interpolate_points, axis=1)

In [255]:
def generate_random_text(length):
    chars = np.array(list(string.ascii_letters + string.digits))
    text = ''.join(np.random.choice(chars) for _ in range(length))
    return text


df['rand_text'] = df.apply(lambda _: generate_random_text(text_size), axis=1)

In [256]:
# Function to convert text to its UTF-8 integer representation
def text_to_int(text):
    return int.from_bytes(text.encode('utf-8'), 'big')


# Apply the function to the 'rand_text' column and create a new column 'text_int'
df['text_int'] = df['rand_text'].apply(text_to_int)

In [257]:
# Normalize the 'text_int' column
df['text_normalized'] = (df['text_int'] - min_val) / (max_val - min_val)

In [258]:
def subtract_normalized(row):
    polynomial = list(row['polynomial'])
    normalized_value = float(row['text_normalized'])
    polynomial[-1] -= normalized_value
    return polynomial


df['polynomial_text_normalized'] = df.apply(subtract_normalized, axis=1)

In [259]:
def subtract_normalized(row):
    polynomial = list(row['polynomial'])
    text_int_value = float(row['text_int'])
    polynomial[-1] -= text_int_value
    return polynomial


df['polynomial_text_int'] = df.apply(subtract_normalized, axis=1)

In [260]:
# # Define a function to subtract the normalized value from the constant term in the polynomial
# def subtract_normalized(row):
#     polynomial = row['polynomial'].copy()
#     normalized_value = row['text_int_normalized']
#     polynomial[-1] -= normalized_value
#     return polynomial


# # Apply the function to each row and store the results in a new column
# df['polynomial_text'] = df.apply(subtract_normalized, axis=1)

In [261]:
# Subtract the last value of 'polynomial' from the last value of 'polynomial_text' and store the result in 'poly_text_representation'
df['poly_normalized_text_representation'] = df['polynomial'].apply(
    lambda x: x[-1]) - df['polynomial_text_normalized'].apply(lambda x: x[-1])

In [262]:
# Subtract the last value of 'polynomial' from the last value of 'polynomial_text' and store the result in 'poly_text_representation'
df['poly_int_text_representation'] = df['polynomial'].apply(
    lambda x: x[-1]) - df['polynomial_text_int'].apply(lambda x: x[-1])

In [263]:
# df[['polynomial', 'polynomial_text']].to_csv('polynomial.csv', index=False)

In [264]:
# save the first value in the array polynomial in polynomial column in a separate column
df['first_polynomial_coeff'] = df['polynomial'].copy().apply(lambda x: x[0])

In [265]:
# Get a Boolean Series where each element is True if the
# corresponding value in the 'first_polynomial_value' column
# is greater than 1
greater_than_zero = df['first_polynomial_coeff'].gt(1)

# Count the number of True values in the Series
count = greater_than_zero.sum()

print(count)

1424


In [266]:
df = df.loc[df['first_polynomial_coeff'] > 1]
df = df.loc[df['poly_int_text_representation'] != 0.0]

In [267]:
# import pandas as pd
# df = pd.read_csv("data.csv")

In [268]:
import math

# f is a polynomial with the coeffs in polynomial_text_int
# a is the first value in x interval
# b is the first value in x interval+300


def blendBF(f, a, b, eps=10**(-2), max_iter=300):

    # Initialize the variables
    n = 0
    a1 = a
    a2 = a
    b1 = b
    b2 = b
    print(f"working on {a} to {b} and {f}")
    while True:
        print(n)
        # Increment the iteration counter
        n += 1

        # Check if the number of iterations exceeds the maximum number of iterations
        if n > max_iter:
            return -99
        # Evaluate the function at the endpoints
        fa = f(a)
        fb = f(b)

        # Compute the midpoint and the false position point
        xB = (a + b) / 2
        fxB = f(xB)

        if fb == fa:
            return n
        else:
            xF = a - (fa * (b - a)) / (fb - fa)
            xF = a - (fa * (b - a)) / (fb - fa)
            fxF = f(xF)

        # Choose the one with the smaller absolute value as the root approximation
        if abs(fxB) < abs(fxF):
            x = xB
            fx = fxB
        else:
            x = xF
            fx = fxF

        # Check if the absolute value of fx is less than or equal to the tolerance
        if abs(fx) <= eps:
            # Return the output
            # n is the number of iterations
            # x is the root approximation
            # fx is the function value
            # a is the left endpoint
            # b is the right endpoint
            return x

        # Update the interval by applying the bisection and false position methods
        if fa * fxB < 0:
            b1 = xB
        else:
            a1 = xB

        if fa * fxF < 0:
            b2 = xF
        else:
            a2 = xF

        # Set a to the maximum of a1 and a2 and b to the minimum of b1 and b2
        a = max(a1, a2)
        b = min(b1, b2)

In [269]:
coeff = [1, -3, 2]  # coefficients for x^2 - 3x + 2
p = np.poly1d(coeff)

# Use the polynomial function with blendBF
root = blendBF(p, 0, 2)

working on 0 to 2 and    2
1 x - 3 x + 2
0


In [270]:
# Define the polynomial function
def polynomial(x, coeffs):
    return sum([coeff*(x**i) for i, coeff in enumerate(coeffs)])


# Apply the blendBF function
df['root'] = df.apply(lambda row: blendBF(lambda x: polynomial(x, row['polynomial_text_int']),
                                          row['x'][0],
                                          row['x'][0]+300), axis=1)

working on 6 to 306 and <function <lambda>.<locals>.<lambda> at 0x7f1e66decae0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 18 to 318 and <function <lambda>.<locals>.<lambda> at 0x7f1e66dec7c0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 7 to 307 and <function <lambda>.<locals>.<lambda> at 0x7f1e66dec7c0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 52 to 352 and <function <lambda>.<locals>.<lambda> at 0x7f1e66dec7c0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 6 to 306 and <function <lambda>.<locals>.<lambda> at 0x7f1e

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 80 to 380 and <function <lambda>.<locals>.<lambda> at 0x7f1e66decae0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 0 to 300 and <function <lambda>.<locals>.<lambda> at 0x7f1e66decae0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
working on 84 to 384 and <function <lambda>.<locals>.<lambda> at 0x7f1e66decae0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 80 to 380 and <function <lambda>.<locals>.<lambda> at 0x7f1e66decae0>
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
working on 88 to 388 and <

In [271]:
df_filtered = df[['first_polynomial_coeff',
                  'text_int', 'poly_int_text_representation', 'root']]

In [272]:
df.to_csv('data.csv', index=False)

In [273]:
df_filtered.to_csv('filtered_dataset.csv', index=False)