In [10]:
from bs4 import BeautifulSoup as soup
import glob
import os
import pandas as pd
from openpyxl import load_workbook

In [11]:
folder_path = os.getcwd()
html_files = glob.glob(os.path.join(folder_path, '*.html'))
lxmls = []
input_file = 'input.xlsx'

In [12]:
for file_path, e in zip(html_files,range(len(html_files))):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            html_content = file.read()

        lxmls.append(soup(html_content, 'lxml'))
    except Exception as e:
        with open('error.log', 'a') as log_file:
            log_file.write(f"Error processing file {file_path}: {e}\n")  

In [13]:
def extract_test_steps(data):
    steps = []

    try:
        for element in range(len(data)):
            if len(data) == 3:
                mains = data.find_all('big', class_='Heading3', string='Main Part of Test Case')
            else:
                mains = data[element].find_all('big', class_='Heading3', string='Main Part of Test Case')

            for e in range(len(mains)):
                test_case = mains[e].find_previous('td', class_=['TestcaseHeadingPositiveResult', 'TestcaseHeadingNegativeResult']).get_text(strip=True)
                div = mains[e].find_next('div', class_='Indentation')
                table = div.find('table', class_='ResultTable')
                
                if table:
                    step_name = None
                    step_status = None
                    expected_output = None
                    step_output = None

                    for row_num, row in enumerate(table.find_all('tr')):
                        heading = row.find('big', class_='Heading4')
                        break_ = False
                        if heading:
                            text = heading.get_text(strip=True)
                            print(f'File: {element}/{len(data)-1}\nTest:{e}/{len(mains)-1}\nRow:{row_num}/{len(table.find_all('tr'))-1}\n\n')
                            if 'None' in text:
                                text = text.strip().rsplit(': ', 1)
                                step_name = text[0]
                                step_output = ''
                                expected_output = step_output
                                step_status = ''
                            elif 'Expected' not in text:
                                text = text.rsplit(': ', 1)
                                step_name = text[0].strip()
                                step_status = text[1].strip()
                                expected_output = ''
                                if 'Failed' in step_status:
                                    step_output = row.find_next('td', class_="DefaultCell").get_text(strip=True)
                                    if 'MaskSymbolOp' in step_output:
                                        tab = row.find_next('table', class_='InfoTableExpand')
                                        step_output = tab.find_all('tr')[-1].find_all('td')[2].get_text(strip=True)
                                    if row.find_next('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.'):# and (step_name in row.find_next('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.').parent.parent.find_all('big', class_='Heading4')):
                                        print('break detected')
                                        for item in row.find_next('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.').parent.parent.find_all('big', class_='Heading4'):
                                            if step_name in item.get_text(strip=True):
                                                print('break confirmed', item)
                                                break_ = True
                                            else:
                                                print('break NOT')
                                else:
                                    step_output = expected_output
                            else:
                                text = text.rsplit(':', 1)
                                text[0:1] = text[0].split('Expected:',2)
                                step_name = text[0].strip()
                                step_status = text[2].strip()
                                expected_output = text[1].strip()
                                if 'Failed' in step_status:
                                    step_output = row.find_next('td', class_="DefaultCell").get_text(strip=True)
                                    if 'MaskSymbolOp' in step_output:
                                        tab = row.find_next('table', class_='InfoTableExpand')
                                        step_output = tab.find_all('tr')[-1].find_all('td')[2].get_text(strip=True)
                                    if row.find_next('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.'):
                                        for item in row.find_next('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.').parent.parent.find_all('big', class_='Heading4'):
                                            if step_name in item.get_text(strip=True):
                                                break_ = True
                                else:
                                    step_output = expected_output

                            steps.append({
                                'Test Case': test_case,
                                'Step Name': step_name,
                                'Status': step_status,
                                'Expected Output': expected_output,
                                'Output': step_output,
                                'BreakOnFail': break_
                            })

    except Exception as e:
        with open('error.log', 'a') as log_file:
            log_file.write(f'Error extracting information: {e}\n')

    return steps

In [14]:
def excel(steps, excel_file_path, new_file_name = f'autogenerated_report.xlsx'):
    try:
        df = pd.DataFrame(steps)

        wb = load_workbook(excel_file_path)
        sheet = wb.active

        excel_test_case_column = 2
        excel_step_name_column = 6
        excel_output_column = 8
        excel_status_column = 9
        Failed = False
        excel_test_case = excel_step_name ='xd' #inital val
        log = []

        for n_row, row in enumerate(sheet.iter_rows(min_row=3)):
            print(f'Processing excel row: {n_row}/{sheet.max_row}')
            if row[excel_test_case_column - 1].value != None:
                excel_test_case = row[excel_test_case_column - 1].value
                Failed = False
            excel_step_name = row[excel_step_name_column - 1].value
            if isinstance(excel_test_case, str) and isinstance(excel_step_name, str):
                matched = False
                for index, df_row in df.iterrows():    
                    if (excel_test_case in df_row['Test Case']) and (excel_step_name in df_row['Step Name']):
                        matched = True
                        if df_row['BreakOnFail'] or Failed:
                            row[excel_status_column - 1].value = 'FAILED'
                            row[excel_output_column - 1].value = 'Break On Fail'
                            df = df.drop(index)
                            Failed = True
                        elif (df_row['Status'] == 'Passed') and not (Failed):
                            row[excel_status_column - 1].value = 'PASSED'
                            row[excel_output_column - 1].value = row[excel_output_column - 2].value
                            df = df.drop(index)
                        elif ((df_row['Status'] == '') and (row[excel_output_column - 2].value == None) or ('Passed' in df_row['Test Case']))and not (Failed):
                            row[excel_status_column - 1].value = 'PASSED'
                            row[excel_output_column - 1].value = row[excel_output_column - 2].value
                            df = df.drop(index)
                        else:
                            row[excel_output_column - 1].value = df_row['Output']
                            row[excel_status_column - 1].value = 'FAILED'   
                            df = df.drop(index)

                    
                if not matched:
                    if sheet.cell(row=row[0].row-1, column=excel_output_column).value == 'Break On Fail':
                        row[excel_output_column - 1].value = 'Break On Fail'
                    row[excel_status_column - 1].value = 'FAILED'
                    log.append(f'Row:{row[0].row+1},Test Case:{excel_test_case}, Step Name: {excel_step_name}\n')
                

        log_file = 'unmatched.log'
        with open(log_file, 'w') as logz:
            logz.writelines(log)


    except Exception as e:
        with open('error.log', 'a') as log_file:
            log_file.write(f'Error updating excel: {e}\nExcel row: {row[0].row}')

    return wb.save(new_file_name), log_file

In [15]:
excel(extract_test_steps(lxmls), input_file)
#excel(pd.read_csv('stepz.csv'), input_file)
#a = extract_test_steps(lxmls)
#excel(pd.read_csv('output.csv'), input_file)

#print('Output saved to file -> autogenerated_report.xlsx \nLogs saved to file -> unmatched.log')

File: 0/4
Test:0/2
Row:2/33


File: 0/4
Test:0/2
Row:14/33


File: 0/4
Test:1/2
Row:2/33


File: 0/4
Test:1/2
Row:14/33


File: 0/4
Test:2/2
Row:2/132


File: 0/4
Test:2/2
Row:4/132


File: 0/4
Test:2/2
Row:7/132


File: 0/4
Test:2/2
Row:10/132


File: 0/4
Test:2/2
Row:16/132


File: 0/4
Test:2/2
Row:18/132


File: 0/4
Test:2/2
Row:21/132


File: 0/4
Test:2/2
Row:24/132


File: 0/4
Test:2/2
Row:30/132


File: 0/4
Test:2/2
Row:33/132


File: 0/4
Test:2/2
Row:39/132


File: 0/4
Test:2/2
Row:42/132


File: 0/4
Test:2/2
Row:48/132


File: 0/4
Test:2/2
Row:51/132


File: 0/4
Test:2/2
Row:57/132


File: 0/4
Test:2/2
Row:60/132


File: 0/4
Test:2/2
Row:66/132


File: 0/4
Test:2/2
Row:69/132


File: 0/4
Test:2/2
Row:75/132


File: 0/4
Test:2/2
Row:77/132


File: 0/4
Test:2/2
Row:80/132


File: 0/4
Test:2/2
Row:83/132


File: 0/4
Test:2/2
Row:89/132


File: 0/4
Test:2/2
Row:92/132


File: 0/4
Test:2/2
Row:98/132


File: 0/4
Test:2/2
Row:101/132


File: 0/4
Test:2/2
Row:107/132


File: 0/4
Test:

  warn("Workbook contains no default style, apply openpyxl's default")


Processing excel row: 0/964
Processing excel row: 1/964
Processing excel row: 2/964
Processing excel row: 3/964
Processing excel row: 4/964
Processing excel row: 5/964
Processing excel row: 6/964
Processing excel row: 7/964
Processing excel row: 8/964
Processing excel row: 9/964
Processing excel row: 10/964
Processing excel row: 11/964
Processing excel row: 12/964
Processing excel row: 13/964
Processing excel row: 14/964
Processing excel row: 15/964
Processing excel row: 16/964
Processing excel row: 17/964
Processing excel row: 18/964
Processing excel row: 19/964
Processing excel row: 20/964
Processing excel row: 21/964
Processing excel row: 22/964
Processing excel row: 23/964
Processing excel row: 24/964
Processing excel row: 25/964
Processing excel row: 26/964
Processing excel row: 27/964
Processing excel row: 28/964
Processing excel row: 29/964
Processing excel row: 30/964
Processing excel row: 31/964
Processing excel row: 32/964
Processing excel row: 33/964
Processing excel row: 34

(None, 'unmatched.log')

In [91]:
a=[]
for element in range(len(lxmls)):
    if len(lxmls) == 3:
        mains = lxmls.find_all('big', class_='Heading3', string='Main Part of Test Case')
    else:
        mains = lxmls[element].find_all('big', class_='Heading3', string='Main Part of Test Case')

    for e in range(len(mains)):
        test_case = mains[e].find_previous('td', class_=['TestcaseHeadingPositiveResult', 'TestcaseHeadingNegativeResult']).get_text(strip=True)
        div = mains[e].find_next('div', class_='Indentation')
        table = div.find('table', class_='ResultTable')
        #print('\n\n',test_case,'\n\n')
        
        if table:
            step_name = None
            step_status = None
            expected_output = None
            step_output = None
            
            for i, row in enumerate(table.find_all('tr')):
                if row.find('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.'):
                    a.append(row.find('td', class_='DefaultCell', string='Test aborted due to BreakOnFail behavior.').parent.parent)
                    break
print(a[0].find_all('big', class_='Heading4'))  

[<big class="Heading4">1. Send diagnostic request "Clear Diagnostic Information" Expected: Positive response: Passed</big>, <big class="Heading4">2. Wait for TRC EN: None</big>, <big class="Heading4">3. Send diagnostic request "ReportDTCExtendedDataRecordByDTCNumber 4" Expected: Positive response: Passed</big>, <big class="Heading4">4. Check response length Expected: Response length of 6 bytes (data is not available): Passed</big>, <big class="Heading4">5. Wait until DTC reached TestPass Limit: None</big>, <big class="Heading4">6. Send diagnostic request "ECU hard reset" Expected: Positive response: Passed</big>, <big class="Heading4">7. Wait for start diagnostics: None</big>, <big class="Heading4">8. Send diagnostic request "ReportDTCExtendedDataRecordByDTCNumber 4" Expected: Positive response: Passed</big>, <big class="Heading4">9. Check response length Expected: Response length of 6 bytes (data is not available): Passed</big>, <big class="Heading4">10. Wait for TRC EN: None</big>, <

In [103]:
for item in a[0].find_all('big',class_='Heading4'):
    if 'Check OCC4' in item.get_text(strip=True):
        print('found', item)
    else:
        print('not found')

not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
found <big class="Heading4">17. Check OCC4 value Expected: OCC4 equal expected value: Passed</big>
not found
not found
not found
not found
not found
not found
found <big class="Heading4">24. Check OCC4 value Expected: OCC4 equal expected value: Failed</big>
