In [1]:
import pandas as pd
import numpy as np

from scripts.convert import find_unit


In [2]:
tdict = {
    "one": [1, 2, 3],
    "two": [4, 5, 6],
    "three": [7, 8, 9]
}

tdf = pd.DataFrame(data=tdict)


In [3]:
tdf

Unnamed: 0,one,two,three
0,1,4,7
1,2,5,8
2,3,6,9


In [4]:
tdf.drop(columns=["one", "four"], errors="ignore")


Unnamed: 0,two,three
0,4,7
1,5,8
2,6,9


In [5]:
def rename_unit(var_name: str, unit: str):
    """
    - Specific to Seattle buildings dataset : Two types of var synthax :
        - abcUNITdef : Unit inside the name
        - abcd(UNIT) : Unit in parentheses
    - Reformat the variable, returning result

    Args:
    - var_name : original variable name
    - unit : the converted name

    Returns:
    - New column name
    """

    if "GFA" in var_name:
        var_name.replace("GFA", "Area(SquareMetre)")
        return var_name
    elif ("("and ")") in var_name:
        start, end = var_name.find("("), var_name.find(")")
        before = var_name[:start + 1]
        after = var_name[end:]
        var_name = f"{before}{unit}{after}"
        return var_name


In [6]:
test_list = [
    "PropertyGFATotal", "PropertyGFAParking", "PropertyGFABuilding(s)", "LargestPropertyUseTypeGFA",
    "SecondLargestPropertyUseTypeGFA", "ThirdLargestPropertyUseTypeGFA", "SiteEUI(kBtu/sf)", "SiteEUIWN(kBtu/sf)",
    "SourceEUI(kBtu/sf)", "SourceEUIWN(kBtu/sf)", "SiteEnergyUse(kBtu)", "SiteEnergyUseWN(kBtu)", "SteamUse(kBtu)",
    "Electricity(kBtu)", "NaturalGas(therms)", "NaturalGas(kBtu)", "OtherFuelUse(kBtu)",
    ]


In [7]:
print(rename_unit(var_name=[test_list[-1]], unit="kWh"))
print(rename_unit(var_name=[test_list[0]], unit="SquareMetre"))

None
None


In [8]:
work_idiot = "abcde"
tdf.assign(work_idiot=np.nan)

Unnamed: 0,one,two,three,work_idiot
0,1,4,7,
1,2,5,8,
2,3,6,9,


In [9]:
tdf

Unnamed: 0,one,two,three
0,1,4,7
1,2,5,8
2,3,6,9


In [14]:
for col in test_list:
    unit = find_unit(var_name=col, convert=True)["converted_unit"]

    if "GFA" in col:
        new_name = col.replace("GFA", "Area(SquareMetre)")
    elif ("("and ")") in col:
        start, end = col.find("("), col.find(")")
        before = col[:start + 1]
        after = col[end:]
        new_name = f"{before}{unit}{after}"
    
    print(new_name)


PropertyArea(SquareMetre)Total
PropertyArea(SquareMetre)Parking
PropertyArea(SquareMetre)Building(s)
LargestPropertyUseTypeArea(SquareMetre)
SecondLargestPropertyUseTypeArea(SquareMetre)
ThirdLargestPropertyUseTypeArea(SquareMetre)
SiteEUI(kWh/m2)
SiteEUIWN(kWh/m2)
SourceEUI(kWh/m2)
SourceEUIWN(kWh/m2)
SiteEnergyUse(kWh)
SiteEnergyUseWN(kWh)
SteamUse(kWh)
Electricity(kWh)
NaturalGas(kWh)
NaturalGas(kWh)
OtherFuelUse(kWh)


In [12]:
test_list

['PropertyGFATotal',
 'PropertyGFAParking',
 'PropertyGFABuilding(s)',
 'LargestPropertyUseTypeGFA',
 'SecondLargestPropertyUseTypeGFA',
 'ThirdLargestPropertyUseTypeGFA',
 'SiteEUI(kBtu/sf)',
 'SiteEUIWN(kBtu/sf)',
 'SourceEUI(kBtu/sf)',
 'SourceEUIWN(kBtu/sf)',
 'SiteEnergyUse(kBtu)',
 'SiteEnergyUseWN(kBtu)',
 'SteamUse(kBtu)',
 'Electricity(kBtu)',
 'NaturalGas(therms)',
 'NaturalGas(kBtu)',
 'OtherFuelUse(kBtu)']

In [15]:
if __name__ == "__main__":
    print("main")
else:
    print(__name__)

main
