In [22]:
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange
from numpy.polynomial.polynomial import Polynomial
import string

In [23]:
# Create a DataFrame
df = pd.DataFrame()

In [24]:
samples = 1000
rand_mat_samples = 100
start = 0
end = 100
odd_start = 3
odd_end = 14  # python ignores the last number
text_size = 6

In [25]:
def x_random_numbers():
    x1 = np.random.randint(start, end)
    x2 = np.random.randint(start, end)
    while x1 == x2:
        x2 = np.random.randint(start, end)
    return min(x1, x2), max(x1, x2)


x = []
# Generate random numbers and add them to the DataFrame
for i in range(samples):
    x1, x2 = x_random_numbers()
    x.append([x1, x2])

df['x'] = x

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,x
0,"[25, 67]"
1,"[30, 45]"
2,"[9, 65]"
3,"[10, 27]"
4,"[53, 61]"


In [26]:
def generate_numbers(df, start=start+1, end=end, samples=samples):
    numbers = np.random.randint(start, end, samples)
    df['y'] = [[-num, num] for num in numbers]


generate_numbers(df)

In [27]:
def random_state(df, start=start, end=end, samples=None):
    samples = len(df) if samples is None else samples
    df['random_state'] = np.random.randint(start, end, samples)


random_state(df)
df.head()

Unnamed: 0,x,y,random_state
0,"[25, 67]","[-14, 14]",53
1,"[30, 45]","[-14, 14]",62
2,"[9, 65]","[-28, 28]",23
3,"[10, 27]","[-70, 70]",4
4,"[53, 61]","[-11, 11]",71


In [28]:
odd_numbers = np.random.choice(range(odd_start, odd_end, 2), samples)
df['sections'] = odd_numbers

In [29]:
random_mat = pd.read_csv('random_matrix.csv')
random_mat.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,83,90,22,78,5,55,64,27,11,92,11,28,26,35
1,56,32,85,37,40,93,51,62,99,26,34,77,8,88
2,8,32,67,85,37,52,81,17,24,49,98,97,60,70
3,43,61,92,21,85,23,92,14,93,18,94,83,76,6
4,33,62,89,17,96,78,59,76,0,75,99,5,4,49


In [30]:
def add_random_mat(df, random_mat):
    def get_random_mat_row(row):
        index = row['random_state'] % (rand_mat_samples)
        return random_mat.iloc[index].values

    df['random_mat_row'] = df.apply(get_random_mat_row, axis=1)
    return df


add_random_mat(df, random_mat)

Unnamed: 0,x,y,random_state,sections,random_mat_row
0,"[25, 67]","[-14, 14]",53,13,"[16, 57, 51, 72, 45, 99, 68, 75, 63, 50, 69, 7..."
1,"[30, 45]","[-14, 14]",62,5,"[91, 65, 80, 59, 16, 65, 53, 57, 47, 47, 67, 2..."
2,"[9, 65]","[-28, 28]",23,13,"[37, 56, 58, 10, 72, 0, 61, 10, 71, 67, 43, 79..."
3,"[10, 27]","[-70, 70]",4,5,"[33, 62, 89, 17, 96, 78, 59, 76, 0, 75, 99, 5,..."
4,"[53, 61]","[-11, 11]",71,3,"[28, 54, 98, 36, 38, 10, 58, 96, 14, 80, 43, 4..."
...,...,...,...,...,...
995,"[65, 70]","[-77, 77]",94,3,"[97, 48, 11, 29, 97, 83, 53, 23, 9, 78, 1, 35,..."
996,"[64, 69]","[-6, 6]",20,11,"[74, 56, 48, 4, 39, 97, 65, 14, 94, 80, 92, 64..."
997,"[2, 3]","[-82, 82]",55,7,"[8, 93, 57, 5, 17, 76, 31, 88, 20, 33, 75, 12,..."
998,"[9, 51]","[-93, 93]",75,9,"[55, 31, 23, 4, 16, 90, 42, 75, 44, 99, 7, 5, ..."


In [31]:
# Function to generate evenly spaced numbers
def generate_points_y(row):
    return list(np.linspace(row['y'][0], row['y'][1], row['sections']+1))


def generate_points_x(row):
    return list(np.linspace(row['x'][0], row['x'][1], row['sections']+1))


# Apply the function to each row
df['x_points'] = df.apply(generate_points_x, axis=1)
df['y_points'] = df.apply(generate_points_y, axis=1)

df.head()

Unnamed: 0,x,y,random_state,sections,random_mat_row,x_points,y_points
0,"[25, 67]","[-14, 14]",53,13,"[16, 57, 51, 72, 45, 99, 68, 75, 63, 50, 69, 7...","[25.0, 28.23076923076923, 31.46153846153846, 3...","[-14.0, -11.846153846153847, -9.69230769230769..."
1,"[30, 45]","[-14, 14]",62,5,"[91, 65, 80, 59, 16, 65, 53, 57, 47, 47, 67, 2...","[30.0, 33.0, 36.0, 39.0, 42.0, 45.0]","[-14.0, -8.4, -2.8000000000000007, 2.799999999..."
2,"[9, 65]","[-28, 28]",23,13,"[37, 56, 58, 10, 72, 0, 61, 10, 71, 67, 43, 79...","[9.0, 13.307692307692307, 17.615384615384613, ...","[-28.0, -23.692307692307693, -19.3846153846153..."
3,"[10, 27]","[-70, 70]",4,5,"[33, 62, 89, 17, 96, 78, 59, 76, 0, 75, 99, 5,...","[10.0, 13.4, 16.8, 20.2, 23.6, 27.0]","[-70.0, -42.0, -14.0, 14.0, 42.0, 70.0]"
4,"[53, 61]","[-11, 11]",71,3,"[28, 54, 98, 36, 38, 10, 58, 96, 14, 80, 43, 4...","[53.0, 55.666666666666664, 58.333333333333336,...","[-11.0, -3.666666666666667, 3.666666666666666,..."


In [32]:
df['rand_vals'] = df.apply(
    lambda row: row['random_mat_row'][:row['sections']+1], axis=1)

In [33]:
df['points'] = df.apply(lambda row: list(
    zip(row['x_points'], row['y_points'])), axis=1)

In [34]:
def update_points(row):
    new_points = []
    for i, (x, y) in enumerate(row['points']):
        if i % 2 == 0:  # subtract for even index
            new_y = y - row['rand_vals'][i]
        else:  # add for odd index
            new_y = y + row['rand_vals'][i]
        new_points.append((x, new_y))
    return new_points


df['poly_points'] = df.apply(update_points, axis=1)

In [35]:
def interpolate_points(row):
    x, y = zip(*row['poly_points'])
    poly = lagrange(x, y)
    return Polynomial(poly).coef.tolist()


df['polynomial'] = df.apply(interpolate_points, axis=1)

In [36]:
def generate_random_text(length):
    chars = np.array(list(string.ascii_letters + string.digits))
    text = ''.join(np.random.choice(chars) for _ in range(length))
    return text


df['rand_text'] = df.apply(lambda _: generate_random_text(text_size), axis=1)

In [37]:
# Function to convert text to its UTF-8 integer representation
def text_to_int(text):
    return int.from_bytes(text.encode('utf-8'), 'big')


# Apply the function to the 'rand_text' column and create a new column 'text_int'
df['text_int'] = df['rand_text'].apply(text_to_int)

In [38]:
max_val = 139081753198206
min_val = 35322350018592

# Normalize the 'text_int' column
df['text_int_normalized'] = (df['text_int'] - min_val) / (max_val - min_val)

In [42]:
# Define a function to subtract the normalized value from the constant term in the polynomial
def subtract_normalized(row):
    polynomial = row['polynomial']
    normalized_value = row['text_int_normalized']
    polynomial[-1] -= normalized_value
    return polynomial


# Apply the function to each row and store the results in a new column
df['polynomial_text'] = df.apply(subtract_normalized, axis=1)

In [49]:
# df[['polynomial', 'polynomial_text']].to_csv('polynomial.csv', index=False)

In [43]:
df_essential = df[['x', 'rand_text', 'polynomial']]

In [44]:
df.to_csv('data.csv', index=False)

In [45]:
df_essential.to_csv('filtered_dataset.csv', index=False)