In [1]:
import os
import random
import re
import numpy as np
import shutil
import pandas as pd
from dataclasses import dataclass
from typing import List, Dict, Optional
from pathlib import Path
import logging

In [2]:
rng = np.random.default_rng(seed=152) # old number=42, 

In [3]:
data_folder = "output_19_01_2025/"
folder_path_outputs = '/app/nse/outputs/' + data_folder
file_path_results = '/app/nse/results/' + data_folder

In [4]:
def read_data(filename):
    with open(filename, 'r') as file:
        # Skip the first three lines
        for _ in range(3):
            next(file)
        
        # Read the rest of the file
        data = np.loadtxt(file)
    
    return data

In [5]:
def get_folder_paths(folder_path, folder_pattern):
    folder_list = [folder_path + f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f)) and re.search(folder_pattern, f)]
    folder_list.sort(key=os.path.getctime)
    return folder_list

In [6]:
def extract_number(filename):
    match = re.search(r'C\[(\d+)\]-avg-\.plt', filename)
    return int(match.group(1)) if match else float('inf') 

In [7]:
def sort_filenames(filenames):
    return sorted(filenames, key=extract_number)

In [8]:
def get_filenames(file_path, file_pattern):
    file_list = [f for f in os.listdir(file_path) if os.path.isfile(os.path.join(file_path, f)) and re.search(file_pattern, f)]
    file_list = sort_filenames(file_list)
    return file_list

In [9]:
def add_text_to_file(file_path, line_number, text_to_add):
    # Read the contents of the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Insert the new text at the specified line number
    lines.insert(line_number - 1, text_to_add + '\n')

    # Write the modified contents back to the file
    with open(file_path, 'w') as file:
        file.writelines(lines)

In [10]:
def change_lines(file_path, lines_to_change, changed_lines):
    # Read the contents of the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    i = 0
    for idx, line in enumerate(lines):
        if re.search(lines_to_change[i], line):
            lines[idx] = changed_lines[i]
            i+=1
            if i == len(changed_lines):
                break
    if len(changed_lines) != i:
        print(len(changed_lines) - i, "lines not changed")
     
    # Write the modified contents back to the file
    with open(file_path, 'w') as file:
        file.writelines(lines)

In [11]:
outputs_folder_names = get_folder_paths(folder_path_outputs, "output*")

In [12]:
@dataclass
class TracerData:
    """Data class for storing tracer-specific information."""
    index: int
    y_pos: float
    z_pos: float
    value: float

@dataclass
class ConfigData:
    """Data class for storing configuration parameters."""
    U: float
    z0_m: float
    surface_value: float
    grad_z: float
    value:float
    tracers: List[TracerData]
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class ConfigParser:
    
    def __init__(self, config_path: str):
        self.config_path = Path(config_path)
        self.config_text = None
        self.config_data = None

    def read_config(self) -> None:
        try:
            with open(self.config_path, 'r') as f:
                self.config_text = f.read()
            logger.info(f"Successfully read configuration file: {self.config_path}")
        except FileNotFoundError:
            logger.error(f"Configuration file not found: {self.config_path}")
            raise
        except Exception as e:
            logger.error(f"Error reading configuration file: {e}")
            raise

    def _extract_float_value(self, pattern: str) -> float:
        match = re.search(pattern, self.config_text)
        if not match:
            raise ValueError(f"Could not find value matching pattern: {pattern}")
        return float(match.group(1))

    def _parse_tracers(self) -> List[TracerData]:
        tracers = []
        tracer_sections = re.finditer(
            r'tracer_(\d+)\s*{[^}]*}\s*#[^]]*]\s*point_emission\s*{([^}]*)}', 
            self.config_text
        )
        
        for section in tracer_sections:
            index = int(section.group(1))
            tracer_text = section.group(2)
            
            try:
                value = float(re.search(r'value\s*=\s*([0-9.]+)', tracer_text).group(1))
                y_pos = float(re.search(r'ypos\s*=\s*[^+]*\+\s*([0-9.]+)', tracer_text).group(1))
                z_pos = float(re.search(r'zpos\s*=\s*([0-9.]+)', tracer_text).group(1))
                
                tracers.append(TracerData(index, y_pos, z_pos, value))
            except Exception as e:
                logger.error(f"Error parsing tracer {index}: {e}")
                raise
                
        return sorted(tracers, key=lambda x: x.index)

    def parse(self) -> ConfigData:
        if not self.config_text:
            self.read_config()

        try:
            # Extract main configuration values
            U = self._extract_float_value(r'U\s*=\s*([0-9.]+)')
            z0_m = self._extract_float_value(r'z0_m\s*=\s*([0-9.]+)')
            surface_value = self._extract_float_value(r'surface_value\s*=\s*([0-9.]+)')
            grad_z = self._extract_float_value(r'grad_z\s*=\s*([0-9.]+)')
            value = self._extract_float_value(r'value\s*=\s*(-[0-9.]+);	# sensible heat flux')
            # Parse tracer data
            tracers = self._parse_tracers()
            
            
            self.config_data = ConfigData(U, z0_m, surface_value, grad_z, value, tracers)
            logger.info("Successfully parsed configuration data")
            
            return self.config_data
            
        except Exception as e:
            logger.error(f"Error parsing configuration: {e}")
            raise

class DataAnalyzer:
    
    def __init__(self, config_data: ConfigData):
        self.config_data = config_data

    def create_dataframe(self) -> pd.DataFrame:
        data = []
        
        # Create rows for each tracer
        for tracer in self.config_data.tracers:
            row = {
                'Tracer': tracer.index,
                'y': tracer.y_pos,
                'z': tracer.z_pos,
                'u': self.config_data.U,
                'power': tracer.value,
                'roughness': self.config_data.z0_m,
                'T': self.config_data.surface_value,
                'sensible_heat_flux':self.config_data.value,
                'T_grad': self.config_data.grad_z
            }
            data.append(row)
            
        return pd.DataFrame(data)

    def save_to_csv(self, output_path: str) -> None:
        df = self.create_dataframe()
        df.to_csv(output_path, index=False)
        logger.info(f"Data saved to: {output_path}")

In [13]:
features_all = pd.DataFrame()
for folder in outputs_folder_names:
    config_path = folder + "/config.txt"
    parser = ConfigParser(config_path)
    features = parser.parse()
    analyzer = DataAnalyzer(features)
    features_pd = analyzer.create_dataframe()
    features_all = pd.concat([features_all, features_pd], axis=0)

2025-02-23 10:21:15,937 - INFO - Successfully read configuration file: /app/nse/outputs/output_19_01_2025/output_2025_2_10_15_14_17/config.txt
2025-02-23 10:21:15,940 - INFO - Successfully parsed configuration data
2025-02-23 10:21:15,941 - INFO - Successfully read configuration file: /app/nse/outputs/output_19_01_2025/output_2025_2_10_16_2_9/config.txt
2025-02-23 10:21:15,943 - INFO - Successfully parsed configuration data
2025-02-23 10:21:15,944 - INFO - Successfully read configuration file: /app/nse/outputs/output_19_01_2025/output_2025_2_10_16_48_16/config.txt
2025-02-23 10:21:15,946 - INFO - Successfully parsed configuration data
2025-02-23 10:21:16,095 - INFO - Successfully read configuration file: /app/nse/outputs/output_19_01_2025/output_2025_2_10_17_34_51/config.txt
2025-02-23 10:21:16,097 - INFO - Successfully parsed configuration data
2025-02-23 10:21:16,173 - INFO - Successfully read configuration file: /app/nse/outputs/output_19_01_2025/output_2025_2_10_18_24_27/config.txt

In [14]:
features_all.shape

(24192, 9)

In [15]:
features_all.to_csv("features.csv", index=False)