In [3]:
import numpy as np
import pandas as pd

def generate_5x5_matrix_data(n_samples, filename, value_range=(-10, 10), include_empty_pred=False, seed=0):
    np.random.seed(seed)
    data = []

    while len(data) < n_samples:
        entries = np.random.randint(value_range[0], value_range[1] + 1, size=25)
        a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y = entries
        matrix = np.array([
            [a, b, c, d, e],
            [f, g, h, i, j],
            [k, l, m, n, o],
            [p, q, r, s, t],
            [u, v, w, x, y]
        ])

        eigenvalues = np.linalg.eigvals(matrix)

        if np.all(np.isreal(eigenvalues)):
            eigs = np.real(eigenvalues)
            sum_sq = np.sum(eigs ** 2)
            row = list(entries) + list(eigs) + [sum_sq]
            if include_empty_pred:
                row.append("")
            data.append(row)

    entry_cols = list("abcdefghijklmnopqrstuvwxyz")[:25]  
    eig_cols = [f"eig{i+1}" for i in range(5)]
    cols = entry_cols + eig_cols + ['sum_squared']
    if include_empty_pred:
        cols.append('predicted_sum_squared')

    df = pd.DataFrame(data, columns=cols)
    df.to_csv(filename, index=False)
    print(f"Saved {n_samples} 5x5 matrices to {filename}.")


generate_5x5_matrix_data(
    n_samples=3000,
    filename='train_5x5_eigen_data.csv',
    value_range=(-10, 10),
    include_empty_pred=False,
    seed=42
)

generate_5x5_matrix_data(
    n_samples=100,
    filename='test_5x5_eigen_data.csv',
    value_range=(-10, 10),
    include_empty_pred=True,
    seed=99
)


Saved 3000 5x5 matrices to train_5x5_eigen_data.csv.
Saved 100 5x5 matrices to test_5x5_eigen_data.csv.
