## 1. Padronização da VDRL de Automação

In [11]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
import re

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl.worksheet._reader")

# Diretório onde estão os arquivos
directory = Path('C:/Users/elxy/Documents/Codigos/Python/P84_85/LDs/padroniza')

In [None]:
# Read the Excel files
base_file = directory / "comments.xlsx"

# Read base and old comments files
df_comments = pd.read_excel(base_file, sheet_name="VDRL")
df_comments.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

display(df_comments.head())

Unnamed: 0,SDLR Code,Idiom Document Version Type,Document Type,Installation Area Code Number,System No/Area of Activity,Discipline/Class of Service,Originator Code,Sequential Number,CLIENT DOCUMENT NUMBER,VENDOR DOCUMENT NUMBER,...,PLANNED DATE.1,ACTUAL DATE.1,RETURNED DATE.1,RETURNED CODE.1,PLANNED DATE.2,ACTUAL DATE.2,RETURNED DATE.2,RETURNED CODE.2,Topside/Hull/Common,REMARK
0,,I,LI,3010.2S,5520.0,800.0,AK1,522.0,I-LI-3010.2S-5520-800-AK1-522,CIMC.01-00-AT-LIS-522,...,2025-02-19,,,,TBD,,,,H,
1,,I,CR,3010.2S,5520.0,800.0,AK1,2.0,I-CR-3010.2S-5520-800-AK1-002,SEAT.00-01-AT-CRN-002,...,2025-04-04,,,,,,,,TS,Canceled in the VDRL of the Topside. It is a c...
2,,I,DE,3010.2S,5520.0,800.0,AK1,1.0,I-DE-3010.2S-5520-800-AK1-001,SEAT.00-01-AT-DAQ-001,...,2025-04-15,,,,,,,,TS,Changed title as it is no longer a common docu...
3,,I,DE,3010.2S,5520.0,800.0,AK1,501.0,I-DE-3010.2S-5520-800-AK1-501,CIMC.01-00-AT-DAQ-501,...,2025-02-03,,,,TBD,,,,H,The title has been changed as it will no longe...
4,,I,RL,3010.2S,5520.0,800.0,AK1,526.0,I-RL-3010.2S-5520-800-AK1-526,CIMC.01-00-AT-RIN-515,...,2025-06-04,,,,TBD,,,,C,This document is common for Topside and Hull. ...


In [3]:
discipline_cols = [
    "Automation (Design)", "Operation", "Maintenance / Aut", "Telecom",
    "Comissining", "Subsea", "Electrical", "HVAC"
]

def join_disciplines(row):
    disciplines = [col for col in discipline_cols if str(row.get(col)).strip().lower() == "yes"]
    return ", ".join(disciplines) if disciplines else row.get("Discipline", "")

df_comments["Discipline"] = df_comments.apply(join_disciplines, axis=1)

In [4]:
display(df_comments[['CLIENT DOCUMENT NUMBER', 'DOCUMENT TITLE', 'Comments', 'Discipline', 'Automation (Design)', 'Operation', 'Maintenance / Aut', 'Telecom', 'Comissining', 'Subsea', 'Electrical', 'HVAC']])

Unnamed: 0,CLIENT DOCUMENT NUMBER,DOCUMENT TITLE,Comments,Discipline,Automation (Design),Operation,Maintenance / Aut,Telecom,Comissining,Subsea,Electrical,HVAC
0,I-LI-3010.2S-5520-800-AK1-522,HULL MASTER TAG REGISTER - HCSS,No,,,,,,,,,
1,I-CR-3010.2S-5520-800-AK1-002,AUTOMATION INTEGRATION - MASTER SCHEDULE AND WBS,,,,,,,,,,
2,I-DE-3010.2S-5520-800-AK1-001,ARCHITECTURE DRAWING - CSS,Yes,"Automation (Design), Maintenance / Aut",Yes,,Yes,,,,,
3,I-DE-3010.2S-5520-800-AK1-501,ARCHITECTURE DRAWING - HCSS,Yes,"Automation (Design), Maintenance / Aut",Yes,,Yes,,,,,
4,I-RL-3010.2S-5520-800-AK1-526,AUTOMATION INTEGRATION - CONSISTENCY REPORT TE...,No,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
394,I-RL-3010.2S-5520-970-AK1-521,HULL FACTORY ACCEPTANCE TEST REPORT (PN-552050...,Yes,Maintenance / Aut,,,Yes,,,,,
395,I-RL-3010.2S-5520-970-AK1-522,HULL FACTORY ACCEPTANCE TEST REPORT (PN-552050...,Yes,Maintenance / Aut,,,Yes,,,,,
396,I-ET-3010.2S-5520-800-AK1-023,TOR CYBERSECURITY RISK ASSESSMENT - CSS,Yes,Automation (Design),Yes,,,,,,,
397,I-DE-3010.2S-5520-710-AK1-520,HULL INVERTER PANEL MODBUS COMMUNICATION BLOCK...,Yes,"Automation (Design), Electrical",Yes,,,,,,Yes,


In [5]:
# Read the Excel files
base_file = directory / "I-LD-3010.2S-5520-800-AK1-001_A.xlsx"
comments_file = directory / "I-LD-3010.2S-5520-800-AK1-001_A_comments.xlsx"

# Read base and old comments files
df_base = pd.read_excel(base_file, sheet_name="VDRL")

# Preprocess dataframes
df_base.columns = df_base.iloc[6]
df_base = df_base.iloc[7:]
df_base.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

# Create new dataframe based on base file
df_new = df_base.copy()

# Track unmatched documents
unmatched_docs = []

# Add Have_comment and Discipline columns
df_new['Comments'] = None
df_new['Discipline'] = None

# Update Have_comment and Discipline based on CLIENT DOCUMENT NUMBER matches
for idx, row in df_new.iterrows():
    match = df_comments[df_comments['CLIENT DOCUMENT NUMBER'] == row['CLIENT DOCUMENT NUMBER']]
    if not match.empty:
        df_new.at[idx, 'Comments'] = match['Comments'].values[0]
        df_new.at[idx, 'Discipline'] = match['Discipline'].values[0]

# Find documents in old comments that don't exist in new file
for idx, row in df_comments.iterrows():
    if row['CLIENT DOCUMENT NUMBER'] not in df_new['CLIENT DOCUMENT NUMBER'].values:
        unmatched_docs.append({
            'CLIENT_DOCUMENT': row['CLIENT DOCUMENT NUMBER'],
            'DOCUMENT_TITLE': row['DOCUMENT TITLE'],
            'Comments': row['Comments'],
            'Discipline': row['Discipline']
        })

# Save the new comments file
print(f"Novos documentos com os comentários gravados em {comments_file}:")
df_new.to_excel(comments_file, index=False, sheet_name="VDRL")

if unmatched_docs:
    # Print unmatched documents
    print("\nDocuments from old comments file that couldn't be matched:")
    df_unmatched = pd.DataFrame(unmatched_docs)
    # Save unmatched documents to Excel with descriptive filename
    output_file = f'unmatched_docs.xlsx'
    df_unmatched.to_excel(output_file, index=False)
    print(f"\nUnmatched documents saved to: {output_file}")
else:
    print("All documents were successfully matched.")
    # Criar um DataFrame vazio quando não há documentos não correspondentes
    df_unmatched = pd.DataFrame(columns=['CLIENT_DOCUMENT', 'DOCUMENT_TITLE', 'Comments', 'Discipline'])

Novos documentos com os comentários gravados em C:\Users\elxy\Documents\Codigos\Python\P84_85\LDs\automation\I-LD-3010.2S-5520-800-AK1-001_A_comments.xlsx:

Documents from old comments file that couldn't be matched:

Unmatched documents saved to: unmatched_docs.xlsx


## 2. Padronização da VDRL da EHOUSE

In [15]:
# Read the Excel files
base_file = directory / "I-LD-3010.2S-1400-170-AIW-001_D.xlsx"
comments_file = directory / "I-LD-3010.2S-1400-170-AIW-001_0_comments.xlsx"
comments_new_file = directory / "I-LD-3010.2S-1400-170-AIW-001_new_comments.xlsx"

# Read base and old comments files
df_comments_1 = pd.read_excel(comments_file, sheet_name="P84 M17 & M13R GENERAL VDRL")
df_comments_1.columns = df_comments_1.iloc[6]
df_comments_1 = df_comments_1.iloc[7:]
df_comments_1.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

df_comments_2 = pd.read_excel(comments_file, sheet_name="P84 M17 VDRL")
df_comments_2.columns = df_comments_2.iloc[6]
df_comments_2 = df_comments_2.iloc[7:]
df_comments_2.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

df_comments_3 = pd.read_excel(comments_file, sheet_name="P84 M13_Rooms VDRL")
df_comments_3.columns = df_comments_3.iloc[6]
df_comments_3 = df_comments_3.iloc[7:]
df_comments_3.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

# Combine the dataframes
df_comments = pd.concat([df_comments_1, df_comments_2, df_comments_3], ignore_index=True)

In [None]:
# Count the number of comments in each discipline
discipline_counts = df_comments['Comments'].value_counts()
print("\nNumber of comments in each discipline:")
print(discipline_counts)



Number of comments in each discipline:
Comments
YES    419
Name: count, dtype: int64


In [14]:
# Read base and comments files
df_base = pd.read_excel(base_file, sheet_name="VDRL")

# Preprocess dataframes
df_base.columns = df_base.iloc[6]
df_base = df_base.iloc[7:]
df_base.dropna(subset=['CLIENT DOCUMENT NUMBER'], inplace=True)

# Create new dataframe based on base file
df_new = df_base.copy()

# Track unmatched documents
unmatched_docs = []

# Add Have_comment and Discipline columns
df_new['Comments'] = None
df_new['Discipline'] = None

# Update Have_comment and Discipline based on CLIENT DOCUMENT NUMBER matches
for idx, row in df_new.iterrows():
    match = df_comments[df_comments['CLIENT DOCUMENT NUMBER'] == row['CLIENT DOCUMENT NUMBER']]
    if not match.empty:
        df_new.at[idx, 'Comments'] = match['Comments'].values[0]
        df_new.at[idx, 'Discipline'] = match['Discipline'].values[0]

# Find documents in old comments that don't exist in new file
for idx, row in df_comments.iterrows():
    if row['CLIENT DOCUMENT NUMBER'] not in df_new['CLIENT DOCUMENT NUMBER'].values:
        unmatched_docs.append({
            'CLIENT_DOCUMENT': row['CLIENT DOCUMENT NUMBER'],
            'DOCUMENT_TITLE': row['DOCUMENT TITLE'],
            'Comments': row['Comments'],
            'Discipline': row['Discipline']
        })

# Save the new comments file
print(f"Novos documentos com os comentários gravados em {comments_new_file}:")
df_new.to_excel(comments_new_file, index=False, sheet_name="VDRL")

if unmatched_docs:
    # Print unmatched documents
    print("\nDocuments from old comments file that couldn't be matched:")
    df_unmatched = pd.DataFrame(unmatched_docs)
    # Save unmatched documents to Excel with descriptive filename
    output_file = f'unmatched_docs.xlsx'
    df_unmatched.to_excel(output_file, index=False)
    print(f"\nUnmatched documents saved to: {output_file}")
else:
    print("All documents were successfully matched.")
    # Criar um DataFrame vazio quando não há documentos não correspondentes
    df_unmatched = pd.DataFrame(columns=['CLIENT_DOCUMENT', 'DOCUMENT_TITLE', 'Comments', 'Discipline'])

Novos documentos com os comentários gravados em C:\Users\elxy\Documents\Codigos\Python\P84_85\LDs\padroniza\I-LD-3010.2S-1400-170-AIW-001_new_comments.xlsx:

Documents from old comments file that couldn't be matched:

Unmatched documents saved to: unmatched_docs.xlsx
