In [26]:
import pandas as pd

master_df = pd.read_csv(r"C:\Users\ngonh\UnisaLoDPlugin\ExtractData\sample3.csv",index_col=False)
unique_cats = master_df['Element.Category'].unique()
cat_dfs = {cat:master_df[master_df['Element.Category']==cat] for cat in unique_cats}
print(unique_cats)

['Ceilings' 'Floors' 'Gutters' 'Roofs' 'Walls']


In [51]:
def process_roof(gutters_df: pd.DataFrame, roofs_df: pd.DataFrame) -> pd.DataFrame:
    """
    Processes roof and gutter data by cleaning column names, determining whether each roof
    has a corresponding gutter, and assigning the appropriate Level of Detail (LOD).

    Args:
        gutters_df (pd.DataFrame): The DataFrame containing gutter data.
        roofs_df (pd.DataFrame): The DataFrame containing roof data.

    Returns:
        pd.DataFrame: A new DataFrame with LOD levels assigned and a column indicating
                      whether each roof has a corresponding gutter. Returns None if
                      any of the input DataFrames is None.
    """
    # If roofs_df or gutters_df is None, skip this dataset
    if roofs_df is None or gutters_df is None:
        return pd.DataFrame()

    # Create copies of the DataFrames to avoid modifying the originals
    gutters_df = clean_column_names(gutters_df)
    roofs_df = clean_column_names(roofs_df)

    # Check if each roof has a corresponding gutter by matching the Document Title
    roofs_df["Has_Gutter"] = roofs_df["Document.Title"].isin(
        gutters_df["Document.Title"]
    )

    # Define the properties for LOD 200 and LOD 300
    lod_300_properties = [
        "Element.Slope",
        "Has_Gutter",
    ]  # Properties required for LOD 300
    lod_200_properties = ["Element.Thickness"]  # Properties required for LOD 200

    # Run the LOD assignment process and return the modified DataFrame
    return assign_lod(roofs_df, lod_300_properties, lod_200_properties)
def is_missing(value):
    return pd.isna(value) or str(value).strip() == ""
def clean_column_names(df: pd.DataFrame) -> pd.DataFrame:
    """
    Creates a new DataFrame with cleaned column names by removing unwanted characters
    such as line breaks, carriage returns, and leading/trailing spaces.

    Args:
        df (pd.DataFrame): The input DataFrame with potentially dirty column names.

    Returns:
        pd.DataFrame: A new DataFrame with cleaned column names.
    """
    # Create a copy of the DataFrame to avoid modifying the original one

    # Clean up column names by removing newlines, carriage returns, and trimming spaces
    df.columns = df.columns.str.replace(
        r"[\r\n]+", " ", regex=True
    )  # Remove newlines
    df.columns = df.columns.str.strip()  # Remove leading/trailing spaces

    return df
def assign_lod(
    df: pd.DataFrame, lod_300_properties: list, lod_200_properties: list = None
) -> pd.DataFrame:
    """
    Creates a new DataFrame and assigns Level of Detail (LOD) levels to each row based on the presence
    of certain properties. It also identifies missing properties for each row.

    Args:
        df (pd.DataFrame): The input DataFrame containing the properties for each item.
        lod_300_properties (list): A list of property names required for LOD 300.
        lod_200_properties (list, optional): A list of property names required for LOD 200. Defaults to None.

    Returns:
        pd.DataFrame: A new DataFrame with the LOD levels assigned and missing properties identified.
    """


    # Initialize columns
    df["LOD"] = 100  # Default to LOD 100
    df["Missing_Properties"] = ""

    # Iterate through the DataFrame rows
    for i, row in df.iterrows():
        missing_properties = []
        lod_200_all_present = True

        # Check for LOD 200 properties (if any are provided)
        if lod_200_properties:
            for prop in lod_200_properties:
                if is_missing(row.get(prop)):  # Check if the property is missing
                    missing_properties.append(prop)
                    lod_200_all_present = False

        # If all LOD 200 properties are present, assign LOD 200
        if lod_200_all_present:
            df.at[i, "LOD"] = 200

            # Check for LOD 300 properties (Slope + Gutter)
            lod_300_all_present = True
            for prop in lod_300_properties:
                # Special case: Gutter check (boolean column)
                if prop == "Has_Gutter" and not row.get("Has_Gutter"):
                    missing_properties.append("Gutter")
                    lod_300_all_present = False
                elif prop != "Has_Gutter" and is_missing(row.get(prop)):
                    missing_properties.append(prop)
                    lod_300_all_present = False

            # If all LOD 300 properties are present, assign LOD 300
            if lod_300_all_present:
                df.at[i, "LOD"] = 300

        # Flag missing properties
        df.at[i, "Missing_Properties"] = (
            ", ".join(missing_properties) if missing_properties else ""
        )

    # Add LOD level indicator columns
    df["LOD_100"] = (df["LOD"] == 100).astype(int)
    df["LOD_200"] = (df["LOD"] == 200).astype(int)
    df["LOD_300"] = (df["LOD"] == 300).astype(int)

    return df

In [None]:
print(clean_column_names(cat_dfs['Roofs']))

In [63]:
print(process_roof(cat_dfs['Gutters'],cat_dfs['Roofs']))

                               Item.Guid                   Document.Title  \
13  83821176-0f08-4614-9cde-a6f875156919  JH-MOD-A-01-0001_ADMIN BUILDING   
14  ef55d2bc-23b7-47e9-b979-a6e89057b741  JH-MOD-A-01-0001_ADMIN BUILDING   

   Element.Category                                  Element.Name  \
13            Roofs  Steel Bar Joist - Steel Deck - EPDM Membrane   
14            Roofs  Steel Bar Joist - Steel Deck - EPDM Membrane   

    Element.Area  Element.Element Thickness  Element.Elevation at Bottom  \
13           NaN                        NaN                          NaN   
14           NaN                        NaN                          NaN   

    Element.Elevation at Top  Element.Host  Element.Id  ...  Item.Material  \
13                       NaN           NaN         NaN  ...            NaN   
14                       NaN           NaN         NaN  ...            NaN   

   Revit Type.AUR_MATERIAL_TYPE  Revit Type.Structural Material  \
13                          N

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  roofs_df["Has_Gutter"] = roofs_df["Document.Title"].isin(
