In [2]:
#Name: PM-reader v12
#Creation date: 22/04/2022
#
# Purpose:
# Read in all the PM data saved from Pronto and create
# a readable CSV file that can be easily manipulated.
# Inputs:
# Pronto.txt - text file containing all PM data
# Outputs:
# TXT file to be read into Excel
#
import os.path
import pandas as pd
import sys
import re
import collections
import more_itertools

#Function definitions
def get_data(group):
    index=1
    while index < len(group): # Get number of lines of this block
        if "Frequency  Freq-UOM" in group[index]: # Remove stray header lines
            del group[index]                    
        if "Duration   Downtime" in group[index]: # Remove stray header lines
            del group[index]  
        index+=1   
                
    # Remove the quotes from some lines
    group_temp=group[1].replace('"PM','PM')
    group[1]=group_temp
                
    PMNo1,PMDesc1,PMlbl1=grab_PM(group[1]) # Split PM task line
                    
    # If PM task number is missing, add 0 and description
    if PMlbl1[:2] != "PM":
        PMNo1="0"
        PMDesc1=" No Description"
        Addline=''.join("PM Task No: "+PMNo1+PMDesc1+'\n')
        group.insert(1,Addline)
                    
    first_line=PMNo1.split('|')+PMDesc1.split('|') 
          
    if "Plant Item" in group[4]:        
        # group55 is the concatenated name of the plant
        n=[12,44,25,15,10]
        group5=(make_chunks(group[5],n))      
        group55=group5.split('|')
        group555 = [x[:-1] for x in group55]
        group555 = [x.strip('\""\s') for x in group555]  
        group555 = [x.strip(' ') for x in group555] 
    else:
        group555=['No Plant', 'No Plant', '01-Jan-2000', '0.0000','']
               
    # This section splits the second line into the various components
    n=[9,7,9,13,20,20,5,16,13,13,14]
    group2=(make_chunks(group[2],n))      
    group22=group2.split('|')
    group222 = [x[:-1] for x in group22]
    group222 = [x.strip(' ') for x in group222]
       
    # Start putting info in order
    Data_line=[first_line]+group222+ [remove_dup1(group[3])] + group555
                                        
    show_line2=list(more_itertools.collapse(Data_line))
        
    return show_line2

def grab_PM(text):
    temp2=" ".join(text.split()) # Remove extra whitespaces  
    empty_str=temp2.split()    
    temp4=" "
    temp5=" " 
    if empty_str[3].isnumeric():         
        if len(empty_str)>4:                             # Check that PM description exists
            temp3=temp2.lstrip().split(":",1)[1]         # Grab PM Task No and description from line 3
            temp4=temp3.lstrip().split(" ",1)[1]         # Grab description
            temp5=temp3.split(" ")[1]                    # Grab PM task number
            
    temp6=temp2.lstrip().split(":",1)[0]                 # Get 'PM Task No' label
    return temp5,temp4,temp6

def get_groups(seq, group_by):
    data = []
    for line in seq:
        # Here the `startswith()` logic can be replaced with other
        # condition(s) depending on the requirement.
        if line.startswith(group_by):
            if data:
                line.split()
                yield data
                data = []
        data.append(line)
    if data:
        yield data

def remove_dup1(line):
    temp3=clean_line1(line)           
    for element in temp3[:]:
        if (element == ''):
            temp3.remove(element)
    return temp3

def clean_line1(lines):
    lines=lines.strip('\n')
    lines=lines.split(" ") # Split on 2 spaces, this gets rid of most of the extraneous whitespaces
    return lines

def make_chunks(s,n):
    result = []    
    for length in n:
        result.append(s[:length])
        s = s[length:]        
    if s:
        result.append(s)     
    return '|'.join(result)

def make_chunks2(s,n):
    res=[]
    for split in n:
        temp=s[:split]
        s=s[split:]
        res.append(temp) 
    return ''.join(res)

#Function definitions#######################################################

# Import Pronto dump file
df=pd.read_table('C:/Users/victor.odman/OneDrive - Hyne Timber/Documents/Reports/Pronto_TB.txt',header=None,delimiter="\t", engine='python')
  
# Move File title to last position
target_row = df.iloc[[0],:]
df = df.shift(-1)
df.iloc[-1] = target_row.squeeze()

df = df[df[0].str.contains("LIVE") == False] # Remove lines containing "LIVE"

df.to_csv('pandas.txt', header=None, index=None, sep='\t', mode='w')

First_Pass=True # Trigger for handling first block of text
mylines = []    # Declare an empty list
with open ('Prontotext-TB.txt','w', encoding='utf-8') as outfile:                  # Open Prontotext.txt for writing text data.
    with open ('pandas.txt', 'rt', encoding='utf8', errors='ignore') as infile: # Open pandas.txt for reading text data.
        outfile.write("WO ID|PM Task No|PM Task Description|Frequency|Freq-UOM|Work Type|Resp Code|KIT|Blank|Tools Required|Lab1|Lab2|Lab3|Lab4|Duration|Downtime|Start Time|Status|Priority|Reset|Qty|Qty2|Qty3|Qty4|Plant Item|Description|Last Done|Next Due Value|Monitor|Task No.|Task Description|Item Code|Material Description|Mat Qty|Cost Centre|UOM\n")         
        for i, group in enumerate(get_groups(infile, "==="), start=1):  
            # This section sets up the column headers based on the text file
             
            # Get relevant numbers and write to outfile
            show_line2=get_data(group)   
                                    
            # Length of current group
            group_len=len(group) - 1
            
            # Index for start of task and safety descriptions
            index_Desc = [i for i, s in enumerate(group) if 'Description: ' in s]
            index_Safety = [i for i, s in enumerate(group) if 'Safety: ' in s]
            index_Materials = [i for i, s in enumerate(group) if 'Materials:    Item Code ' in s]
                                    
            # Check if Description tasks exist
            idx_min=''.join(map(str, index_Desc))            
            if idx_min=="":
                idx_min=len(group) 
            # Check if Safety tasks exist
            idx_mid=''.join(map(str, index_Safety))            
            if idx_mid=="":
                idx_mid=len(group) 
            # Check if Materials exist
            idx_max=''.join(map(str, index_Materials))            
            if idx_max=="":
                idx_max=len(group)
                                    
            # Grab the Task description and safety tasks numbers
            #===================================================
            Work_tasks=[]
            Work_tasks1=[]
            Safety_tasks=[]
            Materials=[]
            
            # Work task numbers and descriptions
            for tasks in group[int(idx_min):int(idx_mid)]:
                # First line of task descriptions
                if "Description: " in tasks:
                    tasks1=(tasks.replace("Description:",""))
                    tasks1_no=re.findall(r'^.+\d+\.\d\d',tasks1)
                    tasks1_desc=re.findall(r'(?<=.\d\d ).+\w+',tasks1)
                    Work_tasks1.append(' ')
                    Work_tasks1.extend(show_line2)
                    Work_tasks1.append(tasks1_no)
                    Work_tasks1.append(tasks1_desc)                     
                    Work_tasks1+='\n'                    
                else:
                    # All other task descriptions
                    n=[22,80,100]
                    group_tasks=(make_chunks(tasks,n))                     
                    tasks_no=group_tasks.split('|')[0]                     
                    tasks_no=tasks_no.rstrip("\n")   
                    tasks_no=tasks_no.lstrip("\"")                    
                    tasks_desc=group_tasks.split('|')[1]  
                    #tasks_desc=group_tasks.lstrip('\"')
                    Work_tasks.extend(show_line2)
                    Work_tasks.append(tasks_no)
                    Work_tasks.append(tasks_desc)  
                    Work_tasks+='\n'
                                                                                                    
            # Safety task numbers and descriptions    
            for s_tasks in group[int(idx_mid):int(idx_max)]:
                # First line of safety tasks
                if "Safety: " in s_tasks:
                    tasks2=(s_tasks.replace("Safety:",""))
                    tasks2_no=re.findall(r'\d.\d\d',tasks2)
                    tasks2_desc=re.findall(r'(?<=.\d\d ).+\w+',tasks2)
                    Safety_tasks.extend(show_line2)
                    Safety_tasks.append(tasks2_no)
                    Safety_tasks.append(tasks2_desc) 
                    Safety_tasks+='\n'
                else:
                    s_tasks_no=re.findall(r'(^.+\d+\.\d\d)',s_tasks)
                    s_tasks_desc=re.findall(r'(?<=.\d\d ).+\w+',s_tasks)
                    Safety_tasks.extend(show_line2)
                    Safety_tasks.append(s_tasks_no)
                    Safety_tasks.append(s_tasks_desc) 
                    Safety_tasks+='\n'
                    
                # Materials    
            for m_tasks in group[int(idx_max):len(group)]:
                # First line of materials
                if "Materials:    Item Code " in m_tasks:
                    continue                                        
                else:
                    n=[21,53,14,15,10,5]
                    m1_tasks=(make_chunks(m_tasks,n))
                    m2_tasks=m1_tasks.strip()
                    m_tasks1,m_tasks2,m_tasks3,m_tasks4,m_tasks5,m_tasks6=m2_tasks.split('|')                    
                    Materials.extend(show_line2)
                    Materials.append('')
                    Materials.append('')
                    Materials.append(m_tasks1)  
                    Materials.append(m_tasks2)
                    Materials.append(m_tasks3)
                    Materials.append(m_tasks4)
                    Materials.append(m_tasks5)
                    Materials+='\n'
                                        
            #===================================================  
            
            all_work_tasks=Work_tasks1+Work_tasks+Safety_tasks+Materials
                        
            show_line1=list(more_itertools.collapse(all_work_tasks))
                        
            show_line2='|'.join(show_line1)  # Join all strings
            
            show_line3=list(more_itertools.collapse(show_line2))
                        
            outfile.writelines(show_line3)   # write to file 
                            
    infile.close() # Close reading file
outfile.close() # Close writing file