In [3]:
import os
import re
import pandas as pd

# Folders for input/output
results_folder = r"E:\forecastingfolder\finalresultspl"
csv_folder = r"E:\chaosgrandfinale\teams1_updated"
chaotic_folder = r"E:\forecasting1\forecastingfolder1\chaotic_columns"
stochastic_folder = r"E:\forecasting1\forecastingfolder1\stochastic columns"

# Ensure output folders exist (though subfolders for each team will be created as needed)
os.makedirs(chaotic_folder, exist_ok=True)
os.makedirs(stochastic_folder, exist_ok=True)

# Regex to capture:
# 1) Team name from the line "Chaos Classification Results for <TEAMNAME>"
# 2) Columns and their classification from lines like:
#       Column: FTHG
#       Classification: stochastic
team_name_pattern = re.compile(r"Chaos Classification Results for\s+(.+)")
column_class_pattern = re.compile(
    r"Column:\s+(.+?)\s+Classification:\s+(chaotic|stochastic)",
    re.IGNORECASE | re.DOTALL
)

# Iterate over each text file in the results folder
for file_name in os.listdir(results_folder):
    if file_name.lower().endswith(".txt"):
        file_path = os.path.join(results_folder, file_name)
        
        # Read the entire text file
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        
        # Extract team name
        match_team = team_name_pattern.search(content)
        if not match_team:
            # If we can't find a team name, skip this file
            print(f"Team name not found in {file_name}. Skipping.")
            continue
        
        team_name = match_team.group(1).strip()
        
        # Load the CSV for this team from pldenoisedwavecsv
        # e.g. "Arsenal_denoised.csv", "Everton_denoised.csv", etc.
        csv_file_path = os.path.join(csv_folder, f"{team_name}.csv")
        if not os.path.exists(csv_file_path):
            print(f"CSV file for team '{team_name}' not found at {csv_file_path}. Skipping.")
            continue
        
        df = pd.read_csv(csv_file_path)
        
        # Find all (column_name, classification) pairs
        matches = column_class_pattern.findall(content)
        
        for col_name, classification in matches:
            col_name = col_name.strip()
            classification = classification.lower().strip()  # "chaotic" or "stochastic"
            
            # Skip columns that do not exist in the CSV
            if col_name not in df.columns:
                print(f"Column '{col_name}' not found in CSV for team '{team_name}'. Skipping.")
                continue
            
            # Retrieve the column data
            col_data = df[col_name]
            
            # Determine the output folder based on classification
            if classification == "chaotic":
                out_dir = os.path.join(chaotic_folder, team_name)
            else:  # stochastic
                out_dir = os.path.join(stochastic_folder, team_name)
            
            # Create the team-specific subfolder if needed
            os.makedirs(out_dir, exist_ok=True)
            
            # Build the output path: <out_dir>/<column_name>.txt
            out_path = os.path.join(out_dir, f"{col_name}.txt")
            
            # Write the column values to a text file (one value per line)
            col_data.to_csv(out_path, index=False, header=False)
            
            print(f"Wrote column '{col_name}' ({classification}) for team '{team_name}' to {out_path}.")


Column 'Year NOT FOUND in the dataset.
--------------------------------------------------------------
Column: FTHG' not found in CSV for team 'Arsenal'. Skipping.
Wrote column 'FTAG' (stochastic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\stochastic columns\Arsenal\FTAG.txt.
Wrote column 'HTGS' (chaotic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\chaotic_columns\Arsenal\HTGS.txt.
Wrote column 'ATGS' (chaotic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\chaotic_columns\Arsenal\ATGS.txt.
Wrote column 'HTGC' (chaotic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\chaotic_columns\Arsenal\HTGC.txt.
Wrote column 'ATGC' (chaotic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\chaotic_columns\Arsenal\ATGC.txt.
Wrote column 'HTP' (stochastic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\stochastic columns\Arsenal\HTP.txt.
Wrote column 'ATP' (chaotic) for team 'Arsenal' to E:\forecasting1\forecastingfolder1\chaotic_columns