In [7]:
# import libraries
from glob import glob
import os
import pandas as pd
import numpy as np

# Output table/file names
OH_SWITCHES_TABLE ='IN_OH_SW.xlsx'
UG_SWITCHES_TABLE ='IN_UG_SW.xls'
OH_TX_TABLE = 'IN_OH_TX.xlsx'
UG_TX_TABLE = 'IN_UG_TX.xlsx'
POLES_TABLE = 'IN_POLES.xlsx'
UG_PRI_CABLE_TABLE = 'IN_CABLES.xlsx'
NTWK_TX_TABLE = 'IN_NTWK_TX.xlsx'

# asset_class_code(ACC) names
OH_SWITCHES_ASSET_CLASS ='OH_SWITCH'
OH_TX_ASSET_CLASS = 'OH_TX'
UG_TX_ASSET_CLASS = 'UG_TX'
POLES_ASSET_CLASS = 'POLE'
UG_PRI_CABLE_ASSET_CLASS = 'UG_CABLE'
NTWK_TX_ASSET_CLASS = 'NTWK_TX'

ASSET_CLASS ='asset_class_code'
ASSET_SUBCLASS ='asset_subclass_code'

#Template folder
ASSET_TEMPLATE_FOLDER='AssetDataTemplates'

# #  First method to read col names: read from the entire folder
# # define filepath and sort the file list
# filesList = glob(os.path.join(ASSET_TEMPLATE_FOLDER, '*.xlsx'))
# numFiles = len(filesList)
# print('Number of Files: ', numFiles)
# sortedFileList = sorted(filesList)

# # read text files in tweet_input directory
# for f in sortedFileList:
#     fileName = os.path.basename(f).split('.')
#     print(fileName[0])
    
#     if ('$' not in fileName):
#         # Read CYME Feeder xlsx file into dataframes
#         with pd.ExcelFile(f) as xlsx:
#             dfTemp = pd.read_excel(xlsx, 'Sheet2') # 280 rows
            
#             # # Strip '\n' from column headers
#             dfTemp.rename(columns=lambda x: x.strip(), inplace=True)
    
#     #fileName[0]= dfTemp
#     #print(fileName[0].columns)
#     print(dfTemp.columns)

#******************************************************
# Declare column names
#******************************************************
UG_SWITCHES_COLS =['asset_id','id','asset_subclass_code','asset_class_code','install_year','hi',
                   'phasing','prid','circuit','tx_phase','in_valley','tie_feeder']

OH_TX_COLS = ['asset_id','asset_class_code','id','circuit','install_year','asset_subclass_code','hi',
              'phasing','primary_voltage','kva','tx_residential','tx_commercial','tx_industrial','device_residential',
              'device_commercial','device_industrial','upstream_device','prid','in_valley','pcb','banking']

UG_TX_COLS = ['asset_id','asset_subclass_code','asset_class_code','install_year','hi','phasing','prid','circuit',
              'primary_voltage','kva','in_valley','tx_residential','tx_commercial','tx_industrial','device_residential',
              'device_commercial','device_industrial','upstream_device','pcb','pedestal','switchable','switch_type','id']

UG_PRI_CABLE_COLS = ['asset_id','id','install_year','hi','asset_subclass_code','asset_class_code','phasing','prid',
                     'circuit','arrangement','installation','material','cable_size','config','length','num_splices',
                     'num_cables','prid_residential','prid_commercial','prid_industrial','nominal_voltage',
                     'wc_prid_catastrophic_res','wc_prid_catastrophic_comm','wc_prid_catastrophic_ind','cable_phase',
                     'wc_replacement','wc_switching_res','wc_switching_comm','wc_switching_ind','wc_switching_duration']

POLES_COLS = ['asset_id','asset_class_code','asset_subclass_code','install_year','hi character','phasing character',
              'prid character','pole_class','tx','tx_type','circuit1','circuit2','circuit3','circuit4','in_valley',
              'tx_residential','tx_commercial','tx_industrial','height','num_circuits','device','tx_kva','id','prid2',
              'prid3','prid4','tx_pcb']

NTWK_TX_COLS = ['asset_id','asset_subclass_code','asset_class_code','install_year','hi','phasing','prid','circuit',
                'primary_voltage','kva character','load double','id character','network_type','tx1','tx2','tx3','tx4']

#******************************************************
# fileName - iterate through entire folder :)
fileName = 'Original_FiveAssetClasses.xlsx'
#fileNameOtherDevices = 'Other Device Numbers.xls'

# Read xlsx file into dataframes
with pd.ExcelFile(fileName) as xlsx:
    #dfTopology = pd.read_excel(xlsx, 'Topology', index_col=None, na_values=['NA']) # IGNORE for now
    dfTransformersV1 = pd.read_excel(xlsx, 'Transformers') # 280 rows
    dfSwitchesV1 = pd.read_excel(xlsx, 'Switches') # Tot:239 - R/Y/B: 116/108/103 values; based on phases
    dfPolesV1 = pd.read_excel(xlsx, 'Poles') # 239 rows; 'Spot Number\n' col contains unique tx ids
    dfCablesV1 = pd.read_excel(xlsx, 'UGPrimaryCables')
    dfFusesV1 = pd.read_excel(xlsx, 'Fuses') # 44 items
    dfUGStructuresV1 = pd.read_excel(xlsx,'UGStructures')


Summary = {'Transformers:': dfTransformersV1.shape, 'Switches:': dfSwitchesV1.shape,'Poles:': dfPolesV1.shape, 
           'Cables:': dfCablesV1.shape, 'Fuses:':dfFusesV1.shape, 'UGStructures:':dfUGStructuresV1.shape}
dfSummary = pd.DataFrame(Summary)

# Make one copy
dfTransformersV2 = dfTransformersV1
dfSwitchesV2 = dfSwitchesV1
dfPolesV2 = dfPolesV1
dfCablesV2 = dfCablesV1
dfFusesV2 = dfFusesV1
dfUGStructuresV2 = dfUGStructuresV1

# 17 columns dropped
dropCommonColumns = ['OBJECTID','WORKORDERID','FIELDVERIFY','COMMENTS','CREATIONUSER','DATECREATED','LASTUSER',
                     'DATEMODIFIED','WORKREQUESTID','DESIGNID','WORKLOCATIONID','WMSID','WORKFLOWSTATUS',
                     'WORKFUNCTION','GISONUMBER','GISOTYPENBR','OWNERSHIP']

#******************************************************
# FUNCTIONS
#******************************************************
def drop_columns(dfAssetClass, dropColumns):
    dfAssetClass = dfAssetClass.drop(dropColumns, axis=1)
    return dfAssetClass

def new_columns(dfAssetClass, numAssetRows, columnID):
    dfAssetClass[columnID] = pd.DataFrame(np.empty([numAssetRows,1]).cumsum(axis=1))
    dfAssetClass.loc[:,columnID] = np.nan
    return dfAssetClass[columnID]

#******************************************************
#Drop all common columns 
#******************************************************
dfSwitchesV2 = drop_columns(dfSwitchesV2, dropCommonColumns)
dfTransformersV2 = drop_columns(dfTransformersV2, dropCommonColumns)
dfFusesV2 = drop_columns(dfFusesV2,dropCommonColumns)
dfCablesV2 = drop_columns(dfCablesV2, dropCommonColumns)
dfUGStructuresV2 = drop_columns(dfUGStructuresV2, dropCommonColumns)
dfPolesV2 = drop_columns(dfPolesV2, dropCommonColumns)

# Make one copy
dfTransformers = dfTransformersV2
dfSwitches = dfSwitchesV2
dfPoles = dfPolesV2
dfCables = dfCablesV2
dfFuses = dfFusesV2
dfUGStructures = dfUGStructuresV2

SummaryV2 = {'Transformers:': dfTransformers.shape, 'Switches:': dfSwitches.shape,'Poles:': dfPoles.shape, 
           'Cables:': dfCables.shape, 'Fuses:':dfFuses.shape, 'UGStructures:':dfUGStructures.shape}
dfSummaryV2 = pd.DataFrame(SummaryV2)

print(dfSummary)
print(dfSummaryV2)   

   Cables:  Fuses:  Poles:  Switches:  Transformers:  UGStructures:
0     3865     736   18961        537           3618          16883
1       39      46      31         52             57             40
   Cables:  Fuses:  Poles:  Switches:  Transformers:  UGStructures:
0     3865     736   18961        537           3618          16883
1       22      29      14         35             40             23


In [21]:
# Save future wait times while running
dfTransformers = dfTransformersV2
dfSwitches = dfSwitchesV2
dfPoles = dfPolesV2
dfCables = dfCablesV2
dfFuses = dfFusesV2
dfUGStructures = dfUGStructuresV2

#******************************************************
# Switches - Reading SwitchGears
#******************************************************
fileNameOtherDevices = 'Other Device Numbers.xlsx'
# Read Other Device Numbers into dataframes
with pd.ExcelFile(fileNameOtherDevices) as xls:
    dfSwitchGears = pd.read_excel(xls, 'SWITCHGEARS') # 280 rows

dropSGcols = ['Switch Gear', 'Adrs #','Location','City','Notes','To Type','Inst. Date','Mftr.','Catalog#','Serial#',
             'DOM','Comments']

dfSwitchGears = drop_columns(dfSwitchGears, dropSGcols)
#dfSwitchGears = dfSwitchGears.dropna() # drop all rows with NaN values

#******************************************************
# ASSET CLASS SPECIFIC DICTIONARIES
#******************************************************
# OPERATING VOLTAGE 190=8kv, 250=13.8kv, 1267 = 0kv, 1237 = 138kv
# Assets: Transformers,
operatingVoltageDict = {'190':'8000','250':'13800','1267':'0','1237':'138000'}

# Phasing change - need to change it to 'str' type, int/float dict key lookup doesn't work
# Assets: UG Switches, Transformers
#phasingDict = {'1.0': '1Ph', '2.0':'1Ph','4.0':'1Ph','3.0':'2Ph','5.0':'2Ph','6.0':'2Ph','7.0':'3Ph'}
phasingDict = {'1': '1Ph', '2':'1Ph','4':'1Ph','3':'2Ph','5':'2Ph','6':'2Ph','7':'3Ph'}

# UG Switches
dictSGassetSubclass = {'PMH-3':'AIR_INSULATED_LIVEFRONT','PMH-5':'AIR_INSULATED_LIVEFRONT',
                       'PMH-9':'AIR_INSULATED_LIVEFRONT','PMH-11':'AIR_INSULATED_LIVEFRONT',
                       'PME-9':'AIR_INSULATED_DEADFRONT','PME-10':'AIR_INSULATED_DEADFRONT',
                       'PME-11':'AIR_INSULATED_DEADFRONT','VISTA-321':'SF6_INSULATED_SWITCH',
                       'VISTA-422':'SF6_INSULATED_SWITCH','VISTA-431':'SF6_INSULATED_SWITCH',
                       '422':'SC_ELEC','431':'SC_ELEC','321':'SC_ELEC','G&W':'GW',
                       'NET':'CARTE_ELEC_LTD'}

# Transformers
# dictOHTxSubclass = {'1':'Standard 1Ph','9':'Standard 3Ph','10':'Standard 2Ph'}
# dictUGTxSubclass = {'2':'Padmount 1Ph','3':'Network Submersible','5':'Submersible', '7':'Padmount 3Ph'}
dictOHTxSubclass = {'1':'Standard','9':'Standard','10':'Standard'}
dictUGTxSubclass = {'2':'Padmount','3':'Network Submersible','5':'Submersible', '7':'Padmount'}

#******************************************************
#
#******************************************************

# 'Type' -> 'PMH'
# 'Loc_No' -> '149-S'
dfSwitchGears['Type'] = dfSwitchGears['Type'].fillna(method='ffill')
numSGrows = len(dfSwitchGears['Loc_No'])
dfSwitchGears['ASSET_SUBCLASS'] = new_columns(dfSwitchGears, numSGrows, 'ASSET_SUBCLASS')
dfSwitchGears =dfSwitchGears.astype(str)
#dfSwitchGears['Loc_No'] = dfSwitchGears.iloc[:,'Loc_No'].apply[s.lstrip("0") for s in listOfNum]
dfSwitchGears['Loc_No'] = [s.lstrip("0") for s in dfSwitchGears['Loc_No']]
dfSwitchGears['ASSET_SUBCLASS'] = dfSwitchGears['Type'].apply(lambda x: dictSGassetSubclass[x])

#******************************************************
# Drop columns
#******************************************************
dropSwitchesCols = ['ANCILLARYROLE','ENABLED','FEEDERINFO','ELECTRICTRACEWEIGHT','LOCATIONID','GPSDATE','LABELTEXT',
                    'OPERATINGVOLTAGE', 'NOMINALVOLTAGE', 'MAXOPERATINGVOLTAGE','MAXCONTINUOUSCURRENT','PRESENTPOSITION_R', 
                    'PRESENTPOSITION_Y', 'PRESENTPOSITION_B','NORMALPOSITION_R','NORMALPOSITION_Y','NORMALPOSITION_B', 
                    'SCADACONTROLID', 'SCADAMONITORID','PREFERREDCIRCUITSOURCE','TIESWITCHINDICATOR',
                    'GANGOPERATED', 'MANUALLYOPERATED','FEATURE_STATUS','HYPERLINK','HYPERLINK_PGDB','SYMBOLROTATION',
                    'INSULATOR_MATERIAL']

dropTxCols = ['ANCILLARYROLE', 'ENABLED', 'FEEDERID2', 'FEEDERINFO', 'ELECTRICTRACEWEIGHT', 'LOCATIONID', 'SYMBOLROTATION', 
              'GPSDATE', 'LABELTEXT', 'NOMINALVOLTAGE', 'GROUNDREACTANCE', 'GROUNDRESISTANCE', 
              'MAGNETIZINGREACTANCE', 'MAGNETIZINGRESISTANCE', 'HIGHSIDEGROUNDREACTANCE','HIGHSIDEGROUNDRESISTANCE', 
              'HIGHSIDEPROTECTION', 'LOCATIONTYPE','COOLINGTYPE', 'FEATURE_STATUS','KVA', 'DEMAND_KVA',
              'DEMAND_DATE_MM_DD_YYYY', 'STREET_LIGHT_FACILITY', 'HIGHSIDECONFIGURATION', 'LOWSIDECONFIGURATION',
              'LOWSIDEGROUNDRESISTANCE', 'LOWSIDEVOLTAGE', 'LATITUDE', 'LONGITUDE']

#******************************************************
# drop asset columns
#******************************************************
dfSwitches = drop_columns(dfSwitches,dropSwitchesCols)
dfTransformers = drop_columns(dfTransformers,dropTxCols)

#******************************************************
# FILTER OUT ASSET CLASSES WITH THEIR RESPECTIVE SUBTYPES
#******************************************************
# To avoid index vs copy error: pd.DataFrame...necessary (spent 4 hours getting rid of the warning error!)
# UG Switches rows
dfSwitches = dfSwitches[dfSwitches.SUBTYPECD == 6]
# number of rows/observations 
numSwitchRows = len(dfSwitches['DEVICENUMBER'])
numTxRows = len(dfTransformers['DEVICENUMBER'])

#******************************************************
# RENAME ASSET COLUMNS
#******************************************************
# Rename Switch columns
dfSwitches = dfSwitches.rename(columns={'SUBTYPECD':ASSET_CLASS,
                                        'DEVICENUMBER':'ID',
                                        'COMPATIBLEUNITID':ASSET_SUBCLASS,
                                        'PHASEDESIGNATION':'PHASING',
                                        'FEEDERID':'CIRCUIT', 
                                        'FEEDERID2':'TIE_FEEDER',
                                        'INSTALLATIONDATE':'INSTALL_YEAR'})

# Rename Transformer columns
dfTransformers = dfTransformers.rename(columns={'DEVICENUMBER':'ID',
                                                'PHASEDESIGNATION':'Type',
                                                'INSTALLATIONDATE':'INSTALL_YEAR',
                                                'FEEDERID':'CIRCUIT',
                                                'RATEDKVA':'KVA'})

# Separate year
dfSwitches['INSTALL_YEAR'] = dfSwitches['INSTALL_YEAR'].apply(lambda x: x.year)
dfTransformers['INSTALL_YEAR'] = dfTransformers['INSTALL_YEAR'].apply(lambda x: x.year)

#******************************************************
# ADD ADDITIONAL COLUMNS AND FILL WITH NaNs
#******************************************************
# UG Switches
dfSwitches['HI'] = new_columns(dfSwitches,numSwitchRows, 'HI')
dfSwitches['TX_PHASE'] = new_columns(dfSwitches,numSwitchRows, 'TX_PHASE')
dfSwitches['IN_VALLEY'] = new_columns(dfSwitches,numSwitchRows, 'IN_VALLEY')
dfSwitches['PRID'] = new_columns(dfSwitches,numSwitchRows, 'PRID')

# Transformers
dfTransformers[ASSET_CLASS] = new_columns(dfTransformers, numTxRows, ASSET_CLASS)
dfTransformers['HI'] = new_columns(dfTransformers, numTxRows,'HI')
dfTransformers['PRID'] = new_columns(dfTransformers, numTxRows,'PRID')
dfTransformers['IN_VALLEY'] = new_columns(dfTransformers, numTxRows,'IN_VALLEY')
dfTransformers['TX_RESIDENTIAL'] = new_columns(dfTransformers, numTxRows,'TX_RESIDENTIAL')
dfTransformers['TX_COMMERCIAL'] = new_columns(dfTransformers, numTxRows,'TX_COMMERCIAL')
dfTransformers['TX_INDUSTRIAL'] = new_columns(dfTransformers, numTxRows,'TX_INDUSTRIAL')
dfTransformers['DEVICE_RESIDENTIAL'] = new_columns(dfTransformers, numTxRows,'DEVICE_RESIDENTIAL')
dfTransformers['DEVICE_COMMERCIAL'] = new_columns(dfTransformers, numTxRows,'DEVICE_COMMERCIAL')
dfTransformers['DEVICE_INDUSTRIAL'] = new_columns(dfTransformers, numTxRows,'DEVICE_INDUSTRIAL')
dfTransformers['UPSTREAM_DEVICE'] = new_columns(dfTransformers, numTxRows,'UPSTREAM_DEVICE')
dfTransformers['PCB'] = new_columns(dfTransformers, numTxRows,'PCB')

#******************************************************
# FILTER OUT ASSET CLASSES WITH THEIR RESPECTIVE SUBTYPES
#******************************************************
# To avoid index vs copy error: pd.DataFrame...necessary (spent 4 hours getting rid of the warning error!)

# UG Switches rows
#dfSwitches = dfSwitches[dfSwitches.SUBTYPECD == 6]

# switch filtering has to go before renaming columns
# UG Tx: 2/3/5/7 - 1Ph/Ntwk/Sub/Pad 3Ph [1436,27,4,507: 1642 counts]
dfUGTransformers = pd.DataFrame(dfTransformers[(dfTransformers.SUBTYPECD == 2) | 
                                  (dfTransformers.SUBTYPECD == 3) | 
                                  (dfTransformers.SUBTYPECD == 5) | 
                                  (dfTransformers.SUBTYPECD == 7) ])

# OH Tx: 1/9/10 - 1Ph/3Ph/2Ph [1125/510/7: 1347 counts]
dfOHTransformers = pd.DataFrame(dfTransformers[(dfTransformers.SUBTYPECD == 1) | 
                                  (dfTransformers.SUBTYPECD == 9) | 
                                  (dfTransformers.SUBTYPECD == 10)])

#******************************************************
# Replace Asset class and 'SUBTYPECD' with actual tx types
#******************************************************
numOHTxRows = len(dfOHTransformers['ID'])
numUGTxRows = len(dfUGTransformers['ID'])

#******************************************************
# TRANSFORMERS
#******************************************************
dfOHTransformers['SUBTYPECD'] = dfOHTransformers['SUBTYPECD'].astype(str)
dfUGTransformers['SUBTYPECD'] = dfUGTransformers['SUBTYPECD'].astype(str)

#Try using .loc[row_indexer,col_indexer] = value instead
dfOHTransformers.loc[:,'SUBTYPECD'] = dfOHTransformers['SUBTYPECD'].apply(lambda x: dictOHTxSubclass[x])
dfUGTransformers.loc[:,'SUBTYPECD'] = dfUGTransformers['SUBTYPECD'].apply(lambda x: dictUGTxSubclass[x])

# Fill in Asset and asset subclass columns
dfOHTransformers = dfOHTransformers.rename(columns={'SUBTYPECD':ASSET_SUBCLASS})
dfUGTransformers = dfUGTransformers.rename(columns={'SUBTYPECD':ASSET_SUBCLASS})

# Remaining OH Tx and UG Tx specific columns
dfOHTransformers['BANKING'] = new_columns(dfOHTransformers, numOHTxRows,'BANKING')
dfUGTransformers['PEDESTAL'] = new_columns(dfUGTransformers, numUGTxRows,'PEDESTAL')
dfUGTransformers['SWITCHABLE'] = new_columns(dfUGTransformers, numUGTxRows,'SWITCHABLE')
dfUGTransformers['SWITCH_TYPE'] = new_columns(dfUGTransformers, numUGTxRows,'SWITCH_TYPE')
#print(numOHTxRows, numUGTxRows)
#print(dfOHTransformers.columns)

# Tx Domain code tables
fileNameDomainCodes_Tx = 'DomainCodes_Tx.xlsx'
# Read Other Device Numbers into dataframes
with pd.ExcelFile(fileNameDomainCodes_Tx) as xls:
    #dfTopology = pd.read_excel(xlsx, 'Topology', index_col=None, na_values=['NA']) # IGNORE for now
    dfUGTxDomainCodes = pd.read_excel(xls, 'UGTransformers')
    dfOHTxDomainCodes = pd.read_excel(xls, 'OHTransformers')

# Convert to string for merge purposes
dfOHTransformers['COMPATIBLEUNITID'] = dfOHTransformers['COMPATIBLEUNITID'].astype(str)
dfUGTransformers['COMPATIBLEUNITID'] = dfUGTransformers['COMPATIBLEUNITID'].astype(str)
dfOHTxDomainCodes['COMPATIBLEUNITID'] = dfOHTxDomainCodes['COMPATIBLEUNITID'].astype(str)
dfUGTxDomainCodes['COMPATIBLEUNITID'] = dfUGTxDomainCodes['COMPATIBLEUNITID'].astype(str)
#print(dfUGTxDomainCodes.head())

#dfOHTransformers=pd.merge(dfOHTransformers, dfOHTxDomainCodes, how='left', on='COMPATIBLEUNITID')
#dfUGTransformers=pd.merge(dfUGTransformers, dfUGTxDomainCodes, how='left', on='COMPATIBLEUNITID')
dfOHTransformers=dfOHTransformers.merge(dfOHTxDomainCodes, how='left', on='COMPATIBLEUNITID')
dfUGTransformers=dfUGTransformers.merge(dfUGTxDomainCodes, how='left', on='COMPATIBLEUNITID')
#print(dfUGTransformers.head(2))

dropOHTxCols = ['COMPATIBLEUNITID','Description','PRIMARY_VOLTAGE','NAMEPLATE','PHASING','Fused','UNITS','FAULTINDICATOR','Tx_type_counts']
dropUGTxCols = ['COMPATIBLEUNITID','Description','PRIMARY_VOLTAGE','NAMEPLATE','PHASING','Fused','UNITS','FAULTINDICATOR','Tx_type_counts']

# drop columns
dfOHTransformers = drop_columns(dfOHTransformers,dropOHTxCols)
dfUGTransformers = drop_columns(dfUGTransformers,dropUGTxCols)
#print(dfSwitches.shape) # (537, 11) with all switches, (260,11) with subtype=6 ('Switch Switchgear')
#print('dfSG Shape:', dfSwitchGears.shape) #(111, 3)

#******************************************************
# 3
#******************************************************
# Replace 'Asset Subclass' col with actual names
dfSwitches['ID'] = dfSwitches['ID'].astype(str)
dfSwitches=pd.merge(dfSwitches, dfSwitchGears, how='left', left_on='ID', right_on='Loc_No')
#dfSwitches['ID'] = dfSwitchGears['Loc_No'].apply(lambda x: )
#df.merge(df1, on='sku', how='left')
# print(len(pd.unique(dfSwitchGears['Loc_No'].values.ravel()))) # 111

#******************************************************
switchesDropMoreCols = [ASSET_SUBCLASS, 'Loc_No']
dfSwitches = dfSwitches.drop(switchesDropMoreCols, axis=1)
dfSwitches = dfSwitches.rename(columns={'ASSET_SUBCLASS': ASSET_SUBCLASS})

#******************************************************
# Rename proper asset nomenclature
dfSwitches[ASSET_CLASS] = OH_SWITCHES_ASSET_CLASS
dfOHTransformers[ASSET_CLASS] = OH_TX_ASSET_CLASS
dfUGTransformers[ASSET_CLASS] = UG_TX_ASSET_CLASS

#******************************************************
#phasingDict = {'1.0': '1Ph', '2.0':'1Ph','4.0':'1Ph','3.0':'2Ph','5.0':'2Ph','6.0':'2Ph','7.0':'3Ph'}
# UG Switches - 'phasing' col
dfSwitches['PHASING'] = dfSwitches['PHASING'].astype(int)
dfSwitches['PHASING'] = dfSwitches['PHASING'].astype(str)
dfSwitches['PHASING'] = dfSwitches['PHASING'].apply(lambda x: phasingDict[x])

# OH and UG Tx - 'operational voltage'
#operatingVoltageDict = {'190':'8000','250':'13800','1267':'0','1237':'138000'}
dfOHTransformers['OPERATINGVOLTAGE'] = dfOHTransformers['OPERATINGVOLTAGE'].astype(str)
dfUGTransformers['OPERATINGVOLTAGE'] = dfUGTransformers['OPERATINGVOLTAGE'].astype(str)
dfOHTransformers['OPERATINGVOLTAGE'] = dfOHTransformers['OPERATINGVOLTAGE'].apply(lambda x: operatingVoltageDict[x])
dfUGTransformers['OPERATINGVOLTAGE'] = dfUGTransformers['OPERATINGVOLTAGE'].apply(lambda x: operatingVoltageDict[x])

dfOHTransformers['Type'] = dfOHTransformers['Type'].astype(str)
dfUGTransformers['Type'] = dfUGTransformers['Type'].astype(str)
dfOHTransformers['Type'] = dfOHTransformers['Type'].apply(lambda x: phasingDict[x])
dfUGTransformers['Type'] = dfUGTransformers['Type'].apply(lambda x: phasingDict[x])

#******************************************************
# Rename col names
dfOHTransformers = dfOHTransformers.rename(columns={'OPERATINGVOLTAGE': 'primary_voltage'})
dfUGTransformers = dfUGTransformers.rename(columns={'OPERATINGVOLTAGE': 'primary_voltage'})

#******************************************************
# Lower case column names
dfSwitches.columns = map(str.lower, dfSwitches.columns)
dfOHTransformers.columns = map(str.lower, dfOHTransformers.columns)
dfUGTransformers.columns = map(str.lower, dfUGTransformers.columns)

#******************************************************
# REARRANGE COLUMNS
#******************************************************
# *All tables need 'asset_id' - rename index
# UG Switches
# UG_SWITCHES_COLS =['asset_id','id','asset_subclass_code','asset_class_code','install_year','hi',
#                    'phasing','prid','circuit','tx_phase','in_valley','tie_feeder']
dfSwitches =dfSwitches[['id','asset_subclass_code','asset_class_code','install_year','hi','phasing','prid','circuit','tx_phase','in_valley','tie_feeder','type']]

# Transformers
# OH_TX_COLS = ['asset_id','asset_class_code','id','circuit','install_year','asset_subclass_code','hi',
#               'phasing','primary_voltage','kva','tx_residential','tx_commercial','tx_industrial','device_residential',
#               'device_commercial','device_industrial','upstream_device','prid','in_valley','pcb','banking']

# OH_TX_COLS = [,'','','','',,
#               'tx_residential','tx_commercial','tx_industrial','device_residential',
#               'device_commercial','device_industrial','upstream_device','prid',','pcb']

#'faultindicator','type','units','tx_type_counts','sec_voltage','fused'

# UG_TX_COLS = ['asset_id','asset_subclass_code','asset_class_code','install_year','hi','phasing','prid','circuit',
#               'primary_voltage','kva','in_valley','tx_residential','tx_commercial','tx_industrial','device_residential',
#               'device_commercial','device_industrial','upstream_device','pcb','pedestal','switchable','switch_type','id']
# UG_TX_COLS = ['asset_id','asset_subclass_code','asset_class_code','install_year','hi','phasing','prid','circuit',
#               'primary_voltage','kva','in_valley','tx_residential','tx_commercial','tx_industrial','device_residential',
#               'device_commercial','device_industrial','upstream_device','pcb','pedestal','switchable','switch_type','id']


print(dfOHTransformers.columns)
print(dfUGTransformers.columns)
#******************************************************
# OUTPUT DATAFRAMES TO EXCEL FILES
#******************************************************
# Output table/file names
# OH_SWITCHES_TABLE ='IN_OH_SW.xlsx'
# UG_SWITCHES_TABLE ='IN_UG_SW.xls'
# OH_TX_TABLE = 'IN_OH_TX.xlsx'
# UG_TX_TABLE = 'IN_UG_TX.xlsx'
# POLES_TABLE = 'IN_POLES.xlsx'
# UG_PRI_CABLE_TABLE = 'IN_CABLES.xlsx'
# NTWK_TX_TABLE = 'IN_NTWK_TX.xlsx'

# UG SWITCH
MasterFile = pd.ExcelWriter(UG_SWITCHES_TABLE)
dfSwitches.to_excel(MasterFile, 'Sheet1')
MasterFile.save()

# OH TX
MasterFile = pd.ExcelWriter(OH_TX_TABLE)
dfOHTransformers.to_excel(MasterFile, 'Sheet1')
MasterFile.save()

# UG TX
MasterFile = pd.ExcelWriter(UG_TX_TABLE)
dfUGTransformers.to_excel(MasterFile, 'Sheet1')
MasterFile.save()


Index(['INSTALL_YEAR', 'CIRCUIT', 'asset_subclass_code', 'COMPATIBLEUNITID',
       'Type', 'OPERATINGVOLTAGE', 'ID', 'FAULTINDICATOR', 'UNITS', 'KVA',
       'asset_class_code', 'HI', 'PRID', 'IN_VALLEY', 'TX_RESIDENTIAL',
       'TX_COMMERCIAL', 'TX_INDUSTRIAL', 'DEVICE_RESIDENTIAL',
       'DEVICE_COMMERCIAL', 'DEVICE_INDUSTRIAL', 'UPSTREAM_DEVICE', 'PCB',
       'PEDESTAL', 'SWITCHABLE', 'SWITCH_TYPE', 'Tx_type_counts',
       'Description', 'NAMEPLATE', 'PRIMARY_VOLTAGE', 'SEC_VOLTAGE', 'PHASING',
       'Fused'],
      dtype='object')
Index(['INSTALL_YEAR', 'CIRCUIT', 'asset_subclass_code', 'COMPATIBLEUNITID',
       'Type', 'OPERATINGVOLTAGE', 'ID', 'FAULTINDICATOR', 'UNITS', 'KVA',
       'asset_class_code', 'HI', 'PRID', 'IN_VALLEY', 'TX_RESIDENTIAL',
       'TX_COMMERCIAL', 'TX_INDUSTRIAL', 'DEVICE_RESIDENTIAL',
       'DEVICE_COMMERCIAL', 'DEVICE_INDUSTRIAL', 'UPSTREAM_DEVICE', 'PCB',
       'BANKING', 'Tx_type_counts', 'Description', 'NAMEPLATE',
       'PRIMARY_VOLTAGE', '

In [37]:
dropSwitchesCols = ['ANCILLARYROLE','ENABLED','FEEDERINFO','ELECTRICTRACEWEIGHT','LOCATIONID','GPSDATE','LABELTEXT',
                    'OPERATINGVOLTAGE', 'NOMINALVOLTAGE', 'MAXOPERATINGVOLTAGE','MAXCONTINUOUSCURRENT','PRESENTPOSITION_R', 
                    'PRESENTPOSITION_Y', 'PRESENTPOSITION_B','NORMALPOSITION_R','NORMALPOSITION_Y','NORMALPOSITION_B', 
                    'SCADACONTROLID', 'SCADAMONITORID','PREFERREDCIRCUITSOURCE','TIESWITCHINDICATOR',
                    'GANGOPERATED', 'MANUALLYOPERATED','FEATURE_STATUS','HYPERLINK','HYPERLINK_PGDB','SYMBOLROTATION',
                    'INSULATOR_MATERIAL']

# drop columns and drop rows that are not UG Switch
dfSwitches = drop_columns(dfSwitches,dropSwitchesCols)
dfSwitches = dfSwitches[dfSwitches.SUBTYPECD == 6]
# df.query('line_race != 0')
# df = df[df.line_race != 0]


# Rename Switch columns
dfSwitches = dfSwitches.rename(columns={'SUBTYPECD':'Asset_Class_Code',
                                        'DEVICENUMBER':'ID',
                                        'COMPATIBLEUNITID':'Asset_Subclass_Code',
                                        'PHASEDESIGNATION':'PHASING',
                                        'FEEDERID':'CIRCUIT', 
                                        'FEEDERID2':'TIE_FEEDER',
                                        'INSTALLATIONDATE':'INSTALL_YEAR'})
# Separate year
dfSwitches['INSTALL_YEAR'] = dfSwitches['INSTALL_YEAR'].apply(lambda x: x.year)

# Add additional columns and fill with NaNs
numSwitchRows = len(dfSwitches['ID'])
dfSwitches['HI'] = new_columns(dfSwitches,numSwitchRows, 'HI')
dfSwitches['TX_PHASE'] = new_columns(dfSwitches,numSwitchRows, 'TX_PHASE')
dfSwitches['IN_VALLEY'] = new_columns(dfSwitches,numSwitchRows, 'IN_VALLEY')
dfSwitches['PRID'] = new_columns(dfSwitches,numSwitchRows, 'PRID')

print(dfSwitches.shape) # (537, 11) with all switches, (260,11) with subtype=6 ('Switch Switchgear')
print('dfSG Shape:', dfSwitchGears.shape) #(111, 3)



(260, 11)
dfSG Shape: (451, 3)


In [None]:
print(dfSwitches.shape)
print('dfSG Shape:', dfSwitchGears.shape)
print(dfSwitchGears.uniquevalues())
# print(dfSwitches.head(5))
# print(dfSwitchGears.head(10))

In [60]:
# colNames = {'Transformers:': list(dfTransformers.columns), 'Switches:': list(dfSwitches.columns),
#            'Poles:': list(dfPoles.columns), 'Cables:': list(dfCables.columns), 'Fuses:':list(dfFuses.columns),
#            'UGStructures:':list(dfUGStructures.columns)}
# dfColNames = pd.Series(colNames)
# print(dfColNames['Transformers:'])

# Cables
dropCablesCols = ['ENABLED', 'INSTALLATIONDATE', 'FEEDERID', 'FEEDERID2', 'FEEDERINFO', 'ELECTRICTRACEWEIGHT', 'LOCATIONID', 
                 'LENGTHSOURCE', 'MEASUREDLENGTH', 'LENGTHUOMCODE', 'WIRECOUNT', 'SUBTYPECD', 'LABELTEXT', 
                 'COMPATIBLEUNITID', 'PHASEDESIGNATION', 'OPERATINGVOLTAGE', 'NOMINALVOLTAGE', 'ISFEEDERTRUNK', 
                 'NEUTRALUSECD', 'FEATURE_STATUS', 'CONDUCTOR_REJUVENATION', 'SHAPE_Length']



#print(dfSwitches.head(5))


# Rename OH Tx columns

# Rename UG Tx columns

# Rename Distribution Poles columns

# Rename UG Cable columns



  INSTALL_YEAR CIRCUIT TIE_FEEDER  Asset Class  Asset Subclass      ID
0   1981-09-01   3S2-1        NaN            6            1233    92-S
1   1900-01-01   5S1-3        NaN            6            1233   336-S
2   2008-06-01   3S2-1        NaN            6            1233   360-S
3   2015-06-12   5S2-3        NaN            6            1233  380-MS
4   1987-10-01   5S2-3      6S2-1            3             193    6-MS


In [61]:
# Poles
dropPolesCols = ['INSTALLATIONDATE', 'SYMBOLROTATION', 'GPSDATE', 'SUBTYPECD', 'LABELTEXT', 'COMPATIBLEUNITID', 
                 'STRUCTURENUMBER', 'FEATURE_STATUS', 'STREETLIGHT_FACILITY', 'REPLACED_DATE_MM_DD_YYYY', 
                 'DEVICENUMBER', 'CONDITION', 'CONDITION_STATUS', 'CONDITION_DATE']



  INSTALL_YEAR CIRCUIT TIE_FEEDER  Asset Class  Asset Subclass      ID  HI  \
0   1981-09-01   3S2-1        NaN            6            1233    92-S NaN   
1   1900-01-01   5S1-3        NaN            6            1233   336-S NaN   
2   2008-06-01   3S2-1        NaN            6            1233   360-S NaN   
3   2015-06-12   5S2-3        NaN            6            1233  380-MS NaN   
4   1987-10-01   5S2-3      6S2-1            3             193    6-MS NaN   

   TX_PHASE IN_VALLEY  
0       NaN        No  
1       NaN        No  
2       NaN        No  
3       NaN        No  
4       NaN        No  


In [66]:
#Transformer

# separate into UG and OH

dropTransformersCols = ['ANCILLARYROLE', 'ENABLED', 'INSTALLATIONDATE', 'FEEDERID', 'FEEDERID2', 'FEEDERINFO', 'ELECTRICTRACEWEIGHT', 'LOCATIONID', 'SYMBOLROTATION', 'GPSDATE', 'SUBTYPECD', 'LABELTEXT', 'COMPATIBLEUNITID', 'PHASEDESIGNATION', 'OPERATINGVOLTAGE', 'NOMINALVOLTAGE', 'GROUNDREACTANCE', 'GROUNDRESISTANCE', 'MAGNETIZINGREACTANCE', 'MAGNETIZINGRESISTANCE', 'HIGHSIDEGROUNDREACTANCE', 'HIGHSIDEGROUNDRESISTANCE', 'HIGHSIDEPROTECTION', 'LOCATIONTYPE', 'DEVICENUMBER', 'FAULTINDICATOR', 'COOLINGTYPE', 'FEATURE_STATUS', 'KVA', 'UNITS', 'DEMAND_KVA', 'DEMAND_DATE_MM_DD_YYYY', 'STREET_LIGHT_FACILITY', 'HIGHSIDECONFIGURATION', 'LOWSIDECONFIGURATION', 'LOWSIDEGROUNDRESISTANCE', 'LOWSIDEVOLTAGE', 'LATITUDE', 'LONGITUDE', 'RATEDKVA']


In [67]:
# Fuses

dropFusesCols = ['ANCILLARYROLE', 'ENABLED', 'INSTALLATIONDATE', 'FEEDERID', 'FEEDERID2', 'FEEDERINFO', 'ELECTRICTRACEWEIGHT', 'LOCATIONID', 'GPSDATE', 'SUBTYPECD', 'LABELTEXT', 'COMPATIBLEUNITID', 'PHASEDESIGNATION', 'OPERATINGVOLTAGE', 'NOMINALVOLTAGE', 'MAXCONTINUOUSCURRENT', 'MAXINTERRUPTINGCURRENT', 'MAXOPERATINGVOLTAGE', 'PRESENTPOSITION_R', 'PRESENTPOSITION_Y', 'PRESENTPOSITION_B', 'NORMALPOSITION_R', 'NORMALPOSITION_Y', 'NORMALPOSITION_B', 'DEVICENUMBER', 'FUSELINKSIZE', 'FEATURE_STATUS', 'SYMBOLROTATION', 'INSULATOR_MATERIAL']


   INSTALL_YEAR CIRCUIT TIE_FEEDER  Asset Class  Asset Subclass      ID  HI  \
0          1981   3S2-1        NaN            6            1233    92-S NaN   
1          1900   5S1-3        NaN            6            1233   336-S NaN   
2          2008   3S2-1        NaN            6            1233   360-S NaN   
3          2015   5S2-3        NaN            6            1233  380-MS NaN   

   TX_PHASE IN_VALLEY  
0       NaN        No  
1       NaN        No  
2       NaN        No  
3       NaN        No  


In [None]:
# Drop_Switch_Columns = ['OBJECTID', 'ANCILLARYROLE', 'ENABLED', 'WORKORDERID', 'FIELDVERIFY', 'COMMENTS','CREATIONUSER', 
#                        'DATECREATED', 'LASTUSER', 'DATEMODIFIED', 'WORKREQUESTID', 'DESIGNID','WORKLOCATIONID', 'WMSID', 
#                        'WORKFLOWSTATUS', 'WORKFUNCTION', 'FEEDERINFO','ELECTRICTRACEWEIGHT', 'LOCATIONID', 'GPSDATE', 
#                        'GISONUMBER', 'GISOTYPENBR', 'LABELTEXT','OWNERSHIP', 'OPERATINGVOLTAGE', 'NOMINALVOLTAGE',
#                        'MAXOPERATINGVOLTAGE', 'MAXCONTINUOUSCURRENT', 'PRESENTPOSITION_R', 'PRESENTPOSITION_Y', 
#                        'PRESENTPOSITION_B', 'NORMALPOSITION_R', 'NORMALPOSITION_Y', 'NORMALPOSITION_B', 'SCADACONTROLID', 
#                        'SCADAMONITORID', 'PREFERREDCIRCUITSOURCE', 'TIESWITCHINDICATOR', 'GANGOPERATED', 'MANUALLYOPERATED',
#                         'FEATURE_STATUS', 'HYPERLINK', 'HYPERLINK_PGDB', 'SYMBOLROTATION', 'INSULATOR_MATERIAL']


# %timeit
# Strip '\n' from column headers
dfTopology.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfSpotLoads.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfLoads.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfCables.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfSwitches.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfNodes.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfOHlines.rename(columns=lambda x: x.replace('\n',''), inplace=True)
dfFuses.rename(columns=lambda x: x.replace('\n',''), inplace=True)

# Rename column headers
dfTopology.rename(columns=lambda x: 'Topology_'+x, inplace=True)
#dfSpotLoads.rename(columns=lambda x: 'SpotLoads_'+x, inplace=True)
dfLoads.rename(columns=lambda x: 'Loads_'+x, inplace=True)
dfCables.rename(columns=lambda x: 'Cables_'+x, inplace=True)
dfSwitches.rename(columns=lambda x: 'Switches_'+x, inplace=True)
dfNodes.rename(columns=lambda x: 'Nodes_'+x, inplace=True)
dfOHlines.rename(columns=lambda x: 'OHlines_'+x, inplace=True)
dfFuses.rename(columns=lambda x: 'Fuses_'+x, inplace=True)

# Merge assets: switch, transformers, fuses, cables, OHlines to Node worksheet
dfNodesMaster = pd.merge(dfNodes, dfSwitches, how='outer', left_on='Nodes_Node Id', right_on ='Switches_From Node')
dfNodesMaster = pd.merge(dfNodesMaster, dfLoads, how='outer', left_on='Nodes_Node Id', right_on='Loads_From Node')
dfNodesMaster = pd.merge(dfNodesMaster, dfFuses, how='outer', left_on='Nodes_Node Id', right_on='Fuses_From Node')
#dfNodesMaster = pd.merge(dfNodesMaster, dfOHlines, how='outer', left_on='Nodes_Node Id', right_on='OHlines_From Node')
dfNodesMaster = pd.merge(dfNodesMaster, dfCables, how='outer', left_on='Nodes_Node Id', right_on='Cables_From Node')
# print(dfNodesMaster.head(3))
print(len(dfNodesMaster.columns))

dfNodesMaster = dfNodesMaster.rename(columns={'Loads_Total CkVA(kVA)':'Nameplate', 'Loads_Spot Number':'TransformerID'})
dfNodesCopy = dfNodesMaster
#print(dfNodesCopy.dtypes)

#Change to str
dfNodesCopy['Cables_From Node']= dfNodesCopy['Cables_From Node'].astype(str)
dfNodesCopy['Cables_To Node']= dfNodesCopy['Cables_To Node'].astype(str)

In [5]:
# Split 'Nodes_Node Id' to 'NodeID_1' and 'NodeID_2' for 'SwitchRegion'
dfNodesCopy['NodeID_1'], dfNodesCopy['NodeID_2'] = zip(*dfNodesCopy['Nodes_Node Id'].
                                                       apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))

#******************************#
#***DIFFERET FROM V5 BEGINS****#
#******************************#
dfNodesCopy['Cables_FromNodeID_1'], dfNodesCopy['Cables_FromNodeID_2'] = zip(*dfNodesCopy['Cables_From Node'].
                                                       apply(lambda x: x.split('_') if '_' in x else (x,np.nan)))
dfNodesCopy['Cables_ToNodeID_1'], dfNodesCopy['Cables_ToNodeID_2'] = zip(*dfNodesCopy['Cables_To Node'].
                                                        apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
                # Columns 'Cables_FromNodeID_2' and 'Cables_ToNodeID_2' dropped in Cables_drop_cols
#******************************#
#***DIFFERET FROM V5 ENDS******#
#******************************#

In [6]:
#Switch region col a.fill(numpy.nan), a[:] = numpy.nan
Num_rows = len(dfNodesCopy['Nodes_Network Id'])
dfNodesCopy['SwitchRegion'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
dfNodesCopy['CablesSwitchRegionFrom'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
dfNodesCopy['CablesSwitchRegionEnd'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
# avoid chain indexing - http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
dfNodesCopy.loc[:,'SwitchRegion'] = np.nan
dfNodesCopy.loc[:,'CablesSwitchRegionFrom'] = np.nan
dfNodesCopy.loc[:,'CablesSwitchRegionEnd'] = np.nan

# V6 changes
#df['Normalized'] = np.where(df['Currency'] == '$', df['Budget'] * 0.78125, df['Budget'])
#'Cables_FromNodeID_2' and 'Cables_ToNodeID_2'
dfNodesCopy['CablesSwitchRegionFrom'] = dfNodesCopy['Cables_FromNodeID_1'].apply(lambda x: x if '-' in x else np.nan)
dfNodesCopy['CablesSwitchRegionEnd'] = dfNodesCopy['Cables_ToNodeID_1'].apply(lambda x: x if '-' in x else np.nan)
dfNodesCopy['SwitchRegion'] = dfNodesCopy['NodeID_1'].apply(lambda x: x if '-' in x else np.nan)

#FillNA
dfNodesCopy['SwitchRegion'] = dfNodesCopy['SwitchRegion'].fillna(method='ffill')
dfNodesCopy['CablesSwitchRegionFrom'] = dfNodesCopy['CablesSwitchRegionFrom'].fillna(method='ffill')
dfNodesCopy['CablesSwitchRegionEnd'] = dfNodesCopy['CablesSwitchRegionEnd'].fillna(method='ffill')

#V5
#dfNodesCopy['SwitchRegion'] = dfNodesCopy['NodeID_1'].apply(lambda x: x if '-' in x else np.nan)

#http://stackoverflow.com/questions/27905295/how-to-replace-nans-by-preceding-values-in-pandas-dataframe
# df.fillna(method='ffill')
# http://stackoverflow.com/questions/11497206/regular-expression-for-letters-dash-underscore-numbers-and-space

# Remove columns - temporary list for now
Nodes_drop_cols = ['Nodes_Phase','Nodes_Node Id', 'NodeID_1','NodeID_2'] #['Nodes_Network Id','Nodes_Node Id','Nodes_Phase'] 
Switches_drop_cols = ['Switches_Network Id','Switches_Equipment Id','Switches_Device Type','Switches_Status',
                         'Switches_Phase','Switches_From Node','Switches_Voltage(kV)'] 
                        #'Switches_Section Id','Switches_State','Switches_Rating(A)'
Tx_drop_cols = ['Loads_Network Id','Loads_Section Id','Loads_Status','Loads_From Node','Loads_Spot Type',
                   'Loads_Dist Number','Loads_Dist Type','Loads_Total kVA(kVA)','Loads_Total kW(kW)','Loads_Total kvar',
                   'Loads_Aver. PF(%)','Loads_Total kWh(kWh)','Loads_Total Cust','Loads_Phase Type','Loads_Config',
                   'Loads_Locked','Loads_Load Model'] #'Loads_TransformerID','Loads_Phase','Loads_Nameplate',

Fuses_drop_cols =['Fuses_Network Id', 'Fuses_Status','Fuses_State','Fuses_Phase','Fuses_Manufacturer', 
                    'Fuses_Model', 'Fuses_Voltage(kV)', 'Fuses_Voltage Class', 'Fuses_Standard', 'Fuses_Rating(A)',
                  'Fuses_Rating','Fuses_Interrupting Rating(A)', 'Fuses_From Node', 'Fuses_To Node'] 
                    #  'Fuses_Section Id', 'Fuses_Equipment Id', Fuses_Rating' 

#OHlines_deleted_cols =['OHlines_Network Id','OHlines_Phase','OHlines_Cond R','OHlines_Cond Y',
                       #'OHlines_Cond B','OHlines_Neutral 1','OHlines_Neutral 2','OHlines_Spacing']
                        #'OHlines_Section Id','OHlines_Length(m)',
OHlines_drop_cols=[]

#V6 changes
Cables_drop_cols =['Cables_Network Id','Cables_Equipment Id','Cables_Line Id','Cables_Status','Cables_Phase',
                      'Cables_# parallel','Cables_Manufacturer','Cables_Standard',
                      'Cables_Rated Voltage(kV)','Cables_Ampacity(A)','Cables_Withstand(A)','Cables_Cable Type',
                      'Cables_Conductor Material','Cables_Sheathed','Cables_Concentric Neutrals','Cables_Line R1(ohms)',
                      'Cables_Line X1(ohms)','Cables_Line B1(µS)','Cables_Line R0(ohms)','Cables_Line X0(ohms)',
                      'Cables_Line B0(µS)','Cables_Harmonic Model', 'Cables_FromNodeID_1','Cables_ToNodeID_1',
                      'Cables_FromNodeID_2','Cables_ToNodeID_2','Cables_From Node', 'Cables_To Node',] 
                    #'Cables_From Node','Cables_To Node','Cables_Length(m)','Cables_Size','Cables_insulation'

# Poles_deleted_cols=[]

combined_drop_cols = (Nodes_drop_cols + Switches_drop_cols + Tx_drop_cols + Fuses_drop_cols + 
                      OHlines_drop_cols + Cables_drop_cols)

print('Number of columns deleted: ', len(combined_drop_cols))
print('Number of rows: ', len(dfNodesCopy['Nodes_Network Id']))

#Drop columns
dfNodesCopy=dfNodesCopy.drop(combined_drop_cols, axis=1)

#print('new cols: ', len(dfNodesCopy.columns))
print('Number of remaining columnns: ', len(dfNodesCopy.columns))
#print(dfNodesCopy.head(7))

Number of columns deleted:  70
Number of rows:  735
Number of remaining columnns:  16


In [7]:
MasterFile = pd.ExcelWriter('V7_NodeIDs.xlsx')
dfNodesCopy.to_excel(MasterFile, 'Sheet1')
MasterFile.save()

In [26]:
#SwitchGrouped = dfNodesCopy.groupby('SwitchRegion')
#print(SwitchGrouped.head(3))

In [41]:
# #  Ctrl + A
# # Ctrl + / to uncomment

# # ****************************
# # A. NODES sheet
# # ****************************
# # 1. Split 'Node Id' to 'NodeID_1' and 'NodeID_2'
# dfNodes['NodeID_1'], dfNodes['NodeID_2'] = zip(*dfNodes['Node Id'].apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
# # 2. Create a 'Copy' dataframe
# dfNodesCopy = pd.DataFrame(dfNodes)
# # 3. Rename all column headers to 'Nodes_' + x
# dfNodesCopy.rename(columns=lambda x: 'Nodes_'+x, inplace=True)
# #print(dfNodes_Copy.count())

# # ****************************
# # B. MASTER SPREADSHEET
# # ****************************
# # Copy dfNodesCopy into dfMaster
# dfMaster = pd.DataFrame(dfNodesCopy)
# # print(dfMaster.count())
# # Nodes_NodeID_1 and Nodes_NodeID_2 are keys

# # ****************************
# # C. Topology sheet
# # ****************************
# # 1. No renaming here,so freate a 'copy' dataframe
# dfTopologyCopy = pd.DataFrame(dfTopology)
# # 2. Rename all column headers to 'Topology_' + x
# dfTopologyCopy.rename(columns=lambda x: 'Topology_'+x, inplace=True)
# #print(dfTopologyCopy.count())

# # 3. Combine topology sheet
# # pd.merge(frame_1, frame_2, left_on = 'county_ID', right_on = 'countyid')
# # dfFinal = 
# # Topology - more match with 'Topology_Coord. Y' over 'Topology_Coord. X'
# dfMaster = pd.merge(dfMaster, dfTopologyCopy, how='outer', left_on='Nodes_NodeID_2', right_on ='Topology_Coord. Y')
# #print(dfMaster.count())


# # ****************************
# # D. Fuses sheet 
# # ****************************
# # 1. Split 'From Node' to 'FromNode_xCoord' and 'FromNode_yCoord'
# dfFuses['FromNode_xCoord'], dfFuses['FromNode_yCoord'] = zip(*dfFuses['From Node'].apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
# # 2. Split 'To Node' to 'ToNode_FuseID' and 'ToNode_FdrID'
# dfFuses['ToNode_FuseID'], dfFuses['ToNode_FdrID'] = zip(*dfFuses['To Node'].apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
# # 3. Create a 'Copy' dataframe
# dfFusesCopy = pd.DataFrame(dfFuses)
# # 4. Rename all column headers to 'Fuses_' + x
# dfFusesCopy.rename(columns=lambda x:'Fuses_'+x, inplace=True)
# # 5. Combine Fuses sheet with Master
# dfMaster = pd.merge(dfMaster, dfFusesCopy, how='outer', left_on='Nodes_NodeID_1', right_on ='Fuses_FromNode_xCoord')
# #print(dfMaster.count())

# # ****************************
# # D1. Output excel file - For VERIFICATION purposes
# # ****************************
# # Verify the excel file 
# # http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.to_excel.html
# # http://stackoverflow.com/questions/29974672/writing-pandas-dataframe-to-excel-with-different-formats-for-different-columns
# # MasterFile = pd.ExcelWriter('master.xlsx')
# # dfMaster.to_excel(MasterFile, 'Sheet1')
# # MasterFile.save()


In [37]:
# Ctrl + / to uncomment

# # ****************************
# # E. Switch sheet 
# # ****************************
# # 1. Split 'From Node' to 'FromNode_1' and 'FromNode_2'
# dfSwitches['FromNode_1'], dfSwitches['FromNode_2'] = zip(*dfSwitches['From Node'].apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
# # 2. Create a 'Copy' dataframe
# dfSwitchesCopy = pd.DataFrame(dfSwitches)
# # 3. Rename all column headers to 'Switches_' + x
# dfSwitchesCopy.rename(columns=lambda x:'Switches_'+x, inplace=True)
# # 4. Combine Switches sheet with Master: 
# # 4.1 First with 'Switches_FromNode_1' - NodeID_1 also has '109-D'/'7-S' switch id :)
# dfMaster = pd.merge(dfMaster, dfSwitchesCopy, how='outer', left_on='Nodes_NodeID_1', right_on ='Switches_FromNode_1')
# # 4.2 Second with 'Section Id' of FusesCopy - maybe not necessary

# # ****************************
# # F. Transformer aka "Loads" in CYME
# # ****************************
# # 
# # 1. Split 'From Node' to 'FromNode_1' and 'FromNode_2'
# dfLoads['FromNode_1'], dfLoads['FromNode_2'] = zip(*dfLoads['From Node'].apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))
# # 2. Create a 'Copy' dataframe
# dfLoadsCopy = pd.DataFrame(dfLoads)
# # 3. Rename all column headers to Loads_' + x
# dfLoadsCopy.rename(columns=lambda x:'Loads_'+x, inplace=True)
# # 4. Combine all Loads with 'FromNode_1'  with dfMaster
# dfMaster = pd.merge(dfMaster, dfLoadsCopy, how='outer', left_on='Nodes_NodeID_1', right_on ='Loads_FromNode_1')
# # 4.2 May need to combine dfLoadsCopy with dfSpotLoads if tx nameplate rating not same


# #Plot
# %matplotlib inline
# import matplotlib.pyplot as plt
# pd.options.display.mpl_style = 'default'
# #dfSwitches.boxplot()
# #dfFusesCopy.boxplot(column="Fuses_Rating(A)")
# #mydf['CigarNum'] = mydf['CigarNum'].convert_objects(convert_numeric=True)
# dfFusesCopy['Fuses_FromNode_xCoord'] = dfFusesCopy['Fuses_FromNode_xCord'].convert_objects(convert_numeric=True)
# dfFusesCopy['Fuses_FromNode_yCoord'] = dfFusesCopy['Fuses_FromNode_yCord'].convert_objects(convert_numeric=True)
# #dfFusesCopy.plot(kind='scatter', x='Fuses_FromNode_xCoord', y='Fuses_FromNode_yCoord')


# # ****************************
# # D1. Output excel file
# # ****************************
# # Verify the excel file 
# # http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.to_excel.html
# # http://stackoverflow.com/questions/29974672/writing-pandas-dataframe-to-excel-with-different-formats-for-different-columns
# #MasterFile = pd.ExcelWriter('master.xlsx')
# #dfMaster.to_excel(MasterFile, 'Sheet1')
# #MasterFile.save()


In [8]:
# ****************************
# G. PRID to each region
# ****************************
# combine PRID to Tx?

# ****************************
# H. Cable 
# ****************************
# combine cable and conductors

# ****************************
# I. Conductors  
# ****************************
# combine poles

# ****************************
# J. Poles 
# ****************************
# Output excel file

