In [7]:
import re

def parse_jcl_content(jcl_content):
    # Regular expressions for program names, DSN filenames, and DISP values
    program_pattern = re.compile(r'^\/\/\S+\s+EXEC\s+PGM=([a-zA-Z0-9_]{1,8})', re.IGNORECASE)
    dsn_pattern = re.compile(r'^\/\/\S+\s+DD\s+.*?DSN=([^,]+)', re.IGNORECASE)
    disp_pattern = re.compile(r'^\/\/\S+\s+DD\s+.*?DISP=([\w]+)', re.IGNORECASE)

    results = []
    current_program = None
    current_dsn = None
    current_disp = None

    for line in jcl_content.splitlines():
        if line.startswith('//*') or line.startswith('//**'):
            continue  # Ignore comment lines

        program_match = program_pattern.match(line)
        if program_match:
            # When a new program name is found, save the current program's DSN/DISP pair
            if current_program and current_dsn and current_disp:
                results.append({
                    'program_name': current_program,
                    'dsn_filename': current_dsn,
                    'disp_value': current_disp
                })
            # Reset for the new program
            current_program = program_match.group(1)
            current_dsn = None
            current_disp = None
            continue

        if current_program:
            dsn_match = dsn_pattern.match(line)
            if dsn_match:
                current_dsn = dsn_match.group(1).strip()
            
            disp_match = disp_pattern.match(line)
            if disp_match:
                current_disp = disp_match.group(1).strip()

            if current_dsn and current_disp:
                # Store the DSN/DISP pair
                results.append({
                    'program_name': current_program,
                    'dsn_filename': current_dsn,
                    'disp_value': current_disp
                })
                # Reset DSN and DISP for potential next DSN/DISP pair in the same program
                current_dsn = None
                current_disp = None

    return results

# Example usage
jcl_content = """
//STEP1   EXEC PGM=PROGRAM1
//SYSIN   DD DSN=filename1,DISP=SHR
//SYSIN   DD DSN=filename4,DISP=MOD
//*SYSIN   DD DSN=filename4,DISP=MOD
//**SYSIN   DD DSN=filename4,DISP=MOD
//STEP2   EXEC PGM=PROGRAM2
//SYSIN   DD DSN=filename2,DISP=NEW
//SYSIN   DD DSN=filename5,DISP=SHR
//STEP3   EXEC PGM=PROGRAM3
//SYSIN   DD DSN=filename3,DISP=MOD
//SYSIN   DD DSN=filename6,DISP=NEW
"""

program_dsn_disp = parse_jcl_content(jcl_content)
for entry in program_dsn_disp:
    print(f"Program name: {entry['program_name']}, DSN filename: {entry['dsn_filename']}, DISP value: {entry['disp_value']}")


Program name: PROGRAM1, DSN filename: filename1, DISP value: SHR
Program name: PROGRAM1, DSN filename: filename4, DISP value: MOD
Program name: PROGRAM2, DSN filename: filename2, DISP value: NEW
Program name: PROGRAM2, DSN filename: filename5, DISP value: SHR
Program name: PROGRAM3, DSN filename: filename3, DISP value: MOD
Program name: PROGRAM3, DSN filename: filename6, DISP value: NEW


In [8]:
def extract_job_and_proc_name(filename):
    job_names = []
    filename = filename.strip("'")
    pattern = re.compile(r'\(([^()]+)\)')
    matches = pattern.findall(filename)
    job_names.extend(matches)
    return job_names

In [9]:
import os
import re
import pandas as pd

# Define the regex pattern for checking lines starting with //
pattern = re.compile(r'^(?!\/\/\*{1,2})\/\/.*')

def traverse_jcl_directory(jcl_directory):
    all_jcl_results = []

    # Traverse the directory with JCL files
    for root, dirs, files in os.walk(jcl_directory):
        for file in files:
            job_name = extract_job_and_proc_name(file)[0]  # Assuming first match is the job name
            file_path = os.path.join(root, file)

            with open(file_path, 'r') as jcl_file:
                # Read content and filter lines
                content_lines = [line for line in jcl_file if pattern.match(line)]
                content = "\n".join(content_lines)

                # Parse the content for program names and DSN/DISP values
                jcl_results = parse_jcl_content(content)

                # Append results with job name
                for result in jcl_results:
                    all_jcl_results.append({
                        'job_name': job_name,
                        'program_name': result['program_name'],
                        'dsn_filename': result['dsn_filename'],
                        'disp_value': result['disp_value']
                    })

    # Convert the collected data into a DataFrame for better presentation
    df = pd.DataFrame(all_jcl_results)
    return df

# Example usage
jcl_directory = "/path/to/jcl/files"
jcl_df = traverse_jcl_directory(jcl_directory)

# Print the DataFrame
print(jcl_df)


Empty DataFrame
Columns: []
Index: []
