  \begin{equation}\label{eq:dsg}
    \ddot{x}_{\ell}=k(x_{\ell+1}-2x_{\ell}+x_{\ell-1})-g\sin{x_{\ell}}
  \end{equation}
  $x_7=\sin(x_1), x_8=\sin(x_3), x_9=\sin(x_5), x_{10}=\cos(x_1), x_{11}=\cos(x_3), x_{12}=\cos(x_5)$

In [4]:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import csv
import pandas as pd
import itertools
import math
import os
import sympy as sp
import random
from sklearn.model_selection import train_test_split
from sympy import symbols, Function, diff
output_directory = r'C:\Users\mebratie\Desktop\KR\KR_Regression\Data\discrete_Sine-Gordon\DSG_T8_N85'
def generate_random_values():
    min_val = 0.1
    max_val = 2
    x1 = random.uniform(min_val, max_val)
    x2 = random.uniform(min_val, max_val)
    x3 = random.uniform(min_val, max_val)
    x4 = random.uniform(min_val, max_val)
    x5 = random.uniform(min_val, max_val)
    x6 = random.uniform(min_val, max_val)
    return x1, x2, x3, x4, x5, x6
def generate_data(initial_conditions):
    def normalize(vector):
        norm = np.linalg.norm(vector)
        if norm == 0: 
            return vector
        return vector / norm
    def normalized_system(y, t):
        x1, x2, x3, x4, x5, x6 = y
        f = np.array([x2, (x3 - 2*x1 + x5) - math.sin(x1), x4, (x5 - 2*x3 + x1) - math.sin(x3), x6, (x1 - 2*x5 + x3) - math.sin(x5)])
        normalized_f = normalize(f)
        return normalized_f
    num_trajectories = 8
    t = np.linspace(0, 50, 75) # 10 data points per trajectory
    all_trajectory_data = []
    initial_conditions_to_print = []
    print("Initial data (x1, x2, x3, x4, x5, x6):")
    for i, initial_condition in enumerate(initial_conditions):
        print(f"({initial_condition[0]}, {initial_condition[1]}, {initial_condition[2]}, {initial_condition[3]}, {initial_condition[4]}, {initial_condition[5]})")
        sol = odeint(normalized_system, initial_condition, t)
        all_trajectory_data.append(sol)
    num_variables = 12 # Adjust number of variables that we need for the regression accordingly
    column_names = [f'x{i+1}' for i in range(num_variables)]
    column_names.append('trajectory')
    with open('50.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(column_names)
        for r, data in enumerate(all_trajectory_data):
            for j in range(len(t)):
                x1 = data[j, 0]
                x2 = data[j, 1]
                x3 = data[j, 2]
                x4 = data[j, 3]
                x5 = data[j, 4]
                x6 = data[j, 5]
                writer.writerow([x1, x2, x3, x4, x5, x6, math.sin(x1), math.sin(x3), math.sin(x5), math.cos(x1), math.cos(x3), math.cos(x5), r+1]) 
    output_directory1 = r'C:\Users\mebratie\Desktop\KR\KR_Regression\results\discrete_Sine-Gordon\DSG_T8_N85'
    plt.figure(figsize=(10, 6))
    for i, sol in enumerate(all_trajectory_data):
        for j in range(sol.shape[1]):
            plt.plot(t, sol[:, j])
    plt.savefig(os.path.join(output_directory1, 'trajectory.png'))
    plt.close()
def split_data():
    trajectories = {}
    column_names = None
    with open('C:\\Users\\mebratie\\Desktop\\KR\\KR_Regression\\Data\\discrete_Sine-Gordon\\DSG_T8_N85\\trainingp_data50.csv', 'r') as trainfile:
        reader = csv.DictReader(trainfile)
        column_names = reader.fieldnames
        for row in reader:
            trajectory = float(row['trajectory'])
            if trajectory not in trajectories:
                trajectories[trajectory] = []
            trajectory_data = {key: float(value) for key, value in row.items()}
            trajectories[trajectory].append(trajectory_data)
    for traj_points in trajectories.values():
        random.shuffle(traj_points)
    num_points_per_file = len(next(iter(trajectories.values()))) // 5  # divide into five splits (n stratify)
    for i in range(5):  # Five-fold cross-validation
        output_filename = f'B50{i+1}.csv'
        with open(os.path.join(output_directory, output_filename), 'w', newline='') as output_file:
            writer = csv.DictWriter(output_file, fieldnames=column_names)
            writer.writeheader()
            for trajectory, points in trajectories.items():
                for point in points[i * num_points_per_file: (i + 1) * num_points_per_file]:
                    writer.writerow(point)
if __name__ == "__main__":
    x1, x2, x3, x4, x5, x6 = generate_random_values()
    initial_conditions = [generate_random_values() for _ in range(8)]
    generate_data(initial_conditions)
    data = np.genfromtxt('50.csv', delimiter=',', names=True)
    training_data = []
    holdout_data = []
    for r in range(1, 9):  # this represents the number of initial data is 5. i.e., (1,6) means 5 initial data
        trajectory_subset = data[data['trajectory'] == r]
        train_set, holdout_set = train_test_split(trajectory_subset, test_size=0.2, random_state=42)
        training_data.extend(train_set)
        holdout_data.extend(holdout_set)
    column_names = data.dtype.names
    with open(os.path.join(output_directory, 'trainingp_data50.csv'), 'w', newline='') as trainfile:
        writer = csv.writer(trainfile)
        writer.writerow(column_names)
        for row in training_data:
            writer.writerow([row[col] for col in column_names])
    with open(os.path.join(output_directory, 'holdoutp_data50.csv'), 'w', newline='') as holdfile:
        writer = csv.writer(holdfile)
        writer.writerow(column_names)
        for row in holdout_data:
            writer.writerow([row[col] for col in column_names])
    split_data()

Initial data (x1, x2, x3, x4, x5, x6):
(1.754701527262797, 1.2578616472374968, 1.5092459381481205, 0.10129011549687922, 0.5950675773761435, 0.9499698604635443)
(0.49851583636118035, 1.2418922431316426, 1.7132277144410837, 0.29416448328392225, 1.6764624548444338, 0.6258893520718428)
(1.9022636862687952, 0.9121900684631449, 0.1664224445197066, 1.4497153529004554, 1.056811274320941, 1.1860732905723805)
(0.8012020541656142, 1.8492044380627932, 0.1123918682841414, 1.1041010423994004, 1.2349602126791683, 1.2173620644018004)
(0.13489828515939212, 0.4151020691138705, 1.8068568872474746, 1.3495211716956257, 0.5525072605973012, 0.8509321096807038)
(0.27338247390758447, 0.519845353539973, 1.966163429059233, 1.7216534911405992, 0.7333025347711245, 0.7953833563249951)
(0.27606427691674396, 0.7637764024334989, 1.3752314248422297, 0.44025069164490493, 0.5480964983001251, 0.8490271864487324)
(1.9013791003032294, 0.42530905026801735, 1.5552781310403225, 1.333909194131377, 1.0974969165944977, 1.15916730