# Description
This notebook loads a bunch of txt-files including data about 
1. TIME     
2. FASTF     
3. PCOOL      
4. TCOOL      
5. GKL        
6. PITCH      
7. [Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10] 

These variables are stored in a dataset list where each element is a numpy array that contains the relevant values evaluated at some macro time-steps. \
The purporse of this notebook is to convert the numpy arrays to string-input files that can be handled by the TU.

In [1]:
import numpy as np 
import pandas as pd
import os, re
import matplotlib.pyplot as plt
from tqdm import tqdm
from numba import njit
from time import perf_counter as pc

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# plt.style.use(["notebook", "science"])
plt.rcParams["figure.figsize"] = [15, 7]
plt.rcParams["figure.dpi"] = 150
plt.rcParams["lines.linewidth"] = 2

np.set_printoptions(linewidth=200)
np.set_printoptions(precision=3)

# Loading the files

In [2]:
map_path = "raw txt-input 4889 samples 10 nodes/"

dir_list = os.listdir(map_path) # all the files
paths = [file for file in dir_list if file.endswith(".txt")] # filtering out files which is not txt
# a list with arrays 
dataset = [np.loadtxt(map_path + path) for path in tqdm(paths)]

100%|██████████| 4889/4889 [00:01<00:00, 3986.86it/s]


The following functions tries to convert the numpy arrays in the dataset list to input-files that can be handled by the TU.

In [3]:
def extract_number(string):
    """
    string: A string containing the text "input", followed by a space, a number, another space, and the word "samples"
    
    Returns the number between "input" and "samples" as an integer.
    """
    match = re.search(r'input (\d+) samples', string)
    if match:
        return int(match.group(1))
    else:
        return None
    
def get_features(data): # add pressure later
    t = data[...,0]
    Q = data[...,6:] 
    P = data[...,2:3].mean() # this is alsmost always constant
    FASTF = data[...,1:2]
    T = data[...,3:4].flatten()
    
    return t, Q, P, FASTF, T


def int_to_list(integer): # parameter must be between 0 and 999
    """
    int_to_list(123) -> [1,2,3]
    int_to_list(3) -> [0,0,3]
    """
    result = [int(s) for s in str(integer)]  
    if integer > 99:
        return result
    if integer > 9:
        return [0] + result 
    else:
        return [0,0,integer]

    
def header(*args, message="begin"):
    """
    header(1,2) -> "* data point #12 begin"
    header(0,0,3) -> "* data point #003 begin"
    header(1,3,4,5) -> "* data point #1345 begin"
    
    Should be used together with "int_to_list"
    header(*int_to_list(10), message="begin") -> "* data point #010 begin"
    """
    n_dict = len(args) * "{}"
    result = "* data point #" + n_dict + f" {message}"
    return result.format(*args)


@np.vectorize
def convert_E_to_D(number): 
    """
    Converts a number/array to another array with fortran compatible format as an array where the element is a string.
    
    convert_E_to_D(123) -> array('1.2300000000000D+02', dtype='<U19')
    convert_E_to_D([123, 456.7]) -> array(['1.2300000000000D+02', '4.5670000000000D+02'], dtype='<U19')
    
    To get a string representation just wrap str around the result.
    
    str(convert_E_to_D([123, 456.7])) -> "['1.2300000000000D+02' '4.5670000000000D+02']"
    """
    scientific_format = format(float(number), "0.13E")
    fortran_format = str(scientific_format).replace("E", "D") # replacing the E of the scientific notation to a D
    return fortran_format


def string_with_floats_to_array(sequence_str):
    """
    string_with_floats_to_array("1 2 3") -> array([1, 2, 3])
    """
    sequence = np.array([float(s) for s in sequence_str.split(" ") if s != ""]) 
    return sequence


def increase_spacing_opt(text, spacing=3, significant_digits=5):
    """
    text: string with floats separated by space with absolute value smaller than 1
    spacing: int
    significant_digits: integer larger or equal to 1
    
    Takes a string of space separated numbers and icrease the spacing between the numbers.
    Also padds the right side with zeros to make sure that all the numbers have the same number of characters.
    
    increase_spacing("0.1 0.24    0.369", spacing=3, significant_digits=5) -> "   0.10000   0.24000   0.36900"
    """
    lines = text.split("\n") # Split string by newline
    floats = [[float(x) for x in line.split()] for line in lines] # convert substrings to floats

    # Use a nested list comprehension to format the floats with a width of 9, and include trailing spaces
    string_format = "{:" + f"{significant_digits + 2 * spacing - 1}.{significant_digits}" + "f}"
    formatted_floats = [[string_format.format(x) for x in sublist] for sublist in floats]
    formatted_floats = ["".join(sublist) for sublist in formatted_floats] # Join the sublists
    final_string = "\n".join(formatted_floats) # Join the sublists with newline
    
    return final_string


def array_to_table(Q, precision=5):
    format_ = '{:10.' + str(precision) + '}'
    formatted_string = "".join(format_.format(q) + "\n" * (i % 8 == 7) for i, q in enumerate(Q))
    return formatted_string

def array_to_table_list(profiles):
    return [array_to_table(Q) for Q in profiles]
    

def save_to_TU(string, n, map_dir):
    with open(map_dir + "TU_" + f"{n}.txt", "w") as file:
        file.write(string)
    file.close()
    
separation_line = "*---+----+-------------------+-------------------+---------+---------------------\n"

def print_time(t, complete_file):
    # for visual purporses
    complete_file += separation_line + "*   1: printout of the result at time [hours]\n" + separation_line
    complete_file += t + "\n"
    complete_file += separation_line
    return complete_file


def program_termination(t, complete_file):
    complete_file += "*  00:  last line of data set (finishing the program)\n"
    complete_file += separation_line + "00000    0" + t + "\n"
    complete_file += separation_line
    return complete_file


#profile = np.random.rand(33, 10)
#print(array_to_table_list(profile))

The numbers 2, 3, 9, 10 in the following printout\
*---+----+-------------------+-------------------+---------+---------------------\
    2    1 0.0000000000000D+00 2.1240100000000D+01\
   0.59300   0.86800   1.01500   1.11400   1.18000\
   1.21500   1.21000   1.16000   1.01900   0.62400\
    3    1 0.0000000000000D+00 1.0284456420000D+13\
   0.59300   0.86800   1.01500   1.11400   1.18000\
   1.21500   1.21000   1.16000   1.01900   0.62400\
    9    0 0.0000000000000D+00 2.9170000000000D+02\
   10    0 0.0000000000000D+00 1.5800000000000D+01\
*---+----+-------------------+-------------------+---------+---------------------\
has the following corresponding meaning\
2: LHGR \
3: Flux \
9: Temperature \
10: Pressure

In [4]:
def ph_to_TU(data, n, save=True):
    t, Q, P, FASTF, T = get_features(data)

    Q_mean = Q.mean(-1, keepdims=True)
    Q_profiles = Q / Q_mean
    
    flux = Q * FASTF / 10 # n/(cm^2 s)
    flux_mean = flux.mean(-1, keepdims=True)
    flux_profiles = Q_profiles * FASTF / FASTF.mean(1, keepdims=True)

    complete_file = ""

    # doing a vectorized operation before the loop to save some time
    t_fortran = convert_E_to_D(t)
    Q_mean_fortran = convert_E_to_D(Q_mean[:,0])
    flux_mean_fortran = convert_E_to_D(flux_mean[:,0])

    Q_table = array_to_table_list(Q_profiles)
    flux_table = array_to_table_list(flux_profiles)

    # adding the staring line
    complete_file += header(*int_to_list(n+1), message="begin") + "\n*\n"

    for i in range(len(t)):
        heat_headline = "{:5d}{:5d} {} {}\n".format(2, 1, t_fortran[i], Q_mean_fortran[i])
        flux_headline = "{:5d}{:5d} {} {}\n".format(3, 1, t_fortran[i], flux_mean_fortran[i])
        temperature_headline = "{:5d}{:5d} {} {}\n".format(9, 0, t_fortran[i], convert_E_to_D(T[i]))
        pressure_headline = "{:5d}{:5d} {} {}\n".format(10, 0, t_fortran[i], convert_E_to_D(P.mean()))
        time_attribute = "{:5d}{:5d} {}".format(1, 0, t_fortran[i])
        
        # printing out the time (visual purporse)
        complete_file = print_time(time_attribute, complete_file)
        complete_file += heat_headline + Q_table[i] + "\n" \
                       + flux_headline + flux_table[i] + "\n" \
                       + temperature_headline \
                       + pressure_headline
    

    complete_file += header(*int_to_list(n+1), message="end") + "\n"
    complete_file = program_termination(f" {t_fortran[i]}", complete_file)
    
#    print(complete_file)
    
    if save:
        save_to_TU(complete_file, n, map_dir=f"input/dataset_12491/subset_{extract_number(map_path)}/") ### NOTE!!! specify which map
            
            
for n in tqdm(range(len(dataset))): 
#for n in [0]:
    ph_to_TU(dataset[n], n, save=True)

100%|██████████| 4889/4889 [00:07<00:00, 665.88it/s]
