In [2]:
import pandas as pd
import numpy as np
from glob import glob
import os

# fileName - iterate through entire folder :)
#fileName = '3S3-1_Crestwood_Feeder_Details.xlsx'
#******************************************************
# FUNCTIONS
#******************************************************
def drop_columns(dfAssetClass, dropColumns):
    dfAssetClass = dfAssetClass.drop(dropColumns, axis=1)
    return dfAssetClass

RES_LOAD = 'feeder_residential_load'
MED_COM_LOAD = 'feeder_small_med_commercial_load'
LARGE_LOAD = 'feeder_large_commercial_load'
#input directory
inputDirectory = 'Metsco_Feeder_Reports'

# define filepath and sort the file list
filesList = glob(os.path.join(inputDirectory, '*.xlsx'))
numFiles = len(filesList)
print('Number of Feeders: ', numFiles)
sortedFileList = sorted(filesList)
count = 0
# variables
dictFeeders = {}
allNodes_list = pd.DataFrame()

# read text files in tweet_input directory
for f in sortedFileList:

    fileName = os.path.basename(f).split('_')
    FeederKey = fileName[0]
    #print(FeederKey)
    
    if ('$' not in FeederKey):
        count += 1
        # Read CYME Feeder xlsx file into dataframes
        with pd.ExcelFile(f) as xlsx:
            #dfTopology = pd.read_excel(xlsx, 'Topology', index_col=None, na_values=['NA']) # IGNORE for now
            dfTopology = pd.read_excel(xlsx, 'Topology') # 280 rows
            dfSpotLoads = pd.read_excel(xlsx, 'Spot Loads') # Tot:239 - R/Y/B: 116/108/103 values; based on phases
            dfLoads = pd.read_excel(xlsx, 'Loads') # 239 rows; 'Spot Number\n' col contains unique tx ids
            dfCables = pd.read_excel(xlsx, 'Cables')
            dfSwitches = pd.read_excel(xlsx, 'Switches') # 41 items
            dfNodes = pd.read_excel(xlsx, 'Nodes') # 249 items
            dfOHlines = pd.read_excel(xlsx, 'OverheadLinesByPhase') #Neutral - 94, Section Id - 381
            dfFuses = pd.read_excel(xlsx, 'Fuses') # 44 items

            # # Strip '\n' from column headers
            dfTopology.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfSpotLoads.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfLoads.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfCables.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfSwitches.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfNodes.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfOHlines.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            dfFuses.rename(columns=lambda x: x.replace('\n',''), inplace=True)
            #print(dfNodes.columns)
            #allNodes_list = allNodes_list.append(dfNodes)
            # Rename column headers
            dfTopology.rename(columns=lambda x: 'Topology_'+x, inplace=True)
            #dfSpotLoads.rename(columns=lambda x: 'SpotLoads_'+x, inplace=True)
            dfLoads.rename(columns=lambda x: 'Loads_'+x, inplace=True)
            dfCables.rename(columns=lambda x: 'Cables_'+x, inplace=True)
            dfSwitches.rename(columns=lambda x: 'Switches_'+x, inplace=True)
            dfNodes.rename(columns=lambda x: 'Nodes_'+x, inplace=True)
            dfOHlines.rename(columns=lambda x: 'OHlines_'+x, inplace=True)
            dfFuses.rename(columns=lambda x: 'Fuses_'+x, inplace=True)
            
            # Merge assets: switch, transformers, fuses, cables, OHlines to Node worksheet
            dfNodesMaster = pd.merge(dfNodes, dfSwitches, how='outer', left_on='Nodes_Node Id', 
                                     right_on ='Switches_From Node')
            dfNodesMaster = pd.merge(dfNodesMaster, dfLoads, how='outer', left_on='Nodes_Node Id', 
                                     right_on='Loads_From Node')
            dfNodesMaster = pd.merge(dfNodesMaster, dfFuses, how='outer', left_on='Nodes_Node Id', 
                                     right_on='Fuses_From Node')
            #dfNodesMaster = pd.merge(dfNodesMaster, dfOHlines, how='outer', left_on='Nodes_Node Id', right_on='OHlines_From Node')
            dfNodesMaster = pd.merge(dfNodesMaster, dfCables, how='outer', left_on='Nodes_Node Id', 
                                     right_on='Cables_From Node')
            # print(dfNodesMaster.head(3))
            #print(len(dfNodesMaster.columns))

            dfNodesMaster = dfNodesMaster.rename(columns={'Loads_Total CkVA(kVA)':'Nameplate', 
                                                          'Loads_Spot Number':'TransformerID'})
            dfNodesCopy = dfNodesMaster
            #print(dfNodesCopy.dtypes)

            #Change to str
            dfNodesCopy['Cables_From Node']= dfNodesCopy['Cables_From Node'].astype(str)
            dfNodesCopy['Cables_To Node']= dfNodesCopy['Cables_To Node'].astype(str)

            # Split 'Nodes_Node Id' to 'NodeID_1' and 'NodeID_2' for 'SwitchRegion'
            dfNodesCopy['NodeID_1'], dfNodesCopy['NodeID_2'] = zip(*dfNodesCopy['Nodes_Node Id'].
                                                                   apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))

            #******************************#
            #***DIFFERET FROM V5 BEGINS****#
            #******************************#
            dfNodesCopy['Cables_FromNodeID_1'], dfNodesCopy['Cables_FromNodeID_2'] = zip(*dfNodesCopy['Cables_From Node'].
                                                                   apply(lambda x: x.split('_') if '_' in x else (x,np.nan)))
            dfNodesCopy['Cables_ToNodeID_1'], dfNodesCopy['Cables_ToNodeID_2'] = zip(*dfNodesCopy['Cables_To Node'].
                                                                    apply(lambda x: x.split('_') if '_' in x else (x, np.nan)))

            
            #Switch region col a.fill(numpy.nan), a[:] = numpy.nan
            Num_rows = len(dfNodesCopy['Nodes_Network Id'])
            dfNodesCopy['SwitchRegion'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
            dfNodesCopy['CablesSwitchRegionFrom'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
            dfNodesCopy['CablesSwitchRegionEnd'] = pd.DataFrame(np.empty([Num_rows,1]).cumsum(axis=1))
            # avoid chain indexing - http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
            dfNodesCopy.loc[:,'SwitchRegion'] = np.nan
            dfNodesCopy.loc[:,'CablesSwitchRegionFrom'] = np.nan
            dfNodesCopy.loc[:,'CablesSwitchRegionEnd'] = np.nan

            # V6 changes
            #df['Normalized'] = np.where(df['Currency'] == '$', df['Budget'] * 0.78125, df['Budget'])
            #'Cables_FromNodeID_2' and 'Cables_ToNodeID_2'
            dfNodesCopy['CablesSwitchRegionFrom'] = dfNodesCopy['Cables_FromNodeID_1'].apply(lambda x: x if '-' in x else np.nan)
            dfNodesCopy['CablesSwitchRegionEnd'] = dfNodesCopy['Cables_ToNodeID_1'].apply(lambda x: x if '-' in x else np.nan)
            dfNodesCopy['SwitchRegion'] = dfNodesCopy['NodeID_1'].apply(lambda x: x if '-' in x else np.nan)

            #FillNA
            dfNodesCopy['SwitchRegion'] = dfNodesCopy['SwitchRegion'].fillna(method='ffill')
            dfNodesCopy['CablesSwitchRegionFrom'] = dfNodesCopy['CablesSwitchRegionFrom'].fillna(method='ffill')
            dfNodesCopy['CablesSwitchRegionEnd'] = dfNodesCopy['CablesSwitchRegionEnd'].fillna(method='ffill')


            # Remove columns - temporary list for now
            Nodes_drop_cols = ['Nodes_Phase','Nodes_Node Id', 'NodeID_1','NodeID_2'] #['Nodes_Network Id','Nodes_Node Id','Nodes_Phase'] 
            Switches_drop_cols = ['Switches_Network Id','Switches_Equipment Id','Switches_Device Type','Switches_Status',
                                     'Switches_Phase','Switches_From Node','Switches_Voltage(kV)'] 
                                    #'Switches_Section Id','Switches_State','Switches_Rating(A)'
            Tx_drop_cols = ['Loads_Network Id','Loads_Section Id','Loads_Status','Loads_From Node','Loads_Spot Type',
                               'Loads_Dist Number','Loads_Dist Type','Loads_Total kVA(kVA)','Loads_Total kW(kW)','Loads_Total kvar',
                               'Loads_Aver. PF(%)','Loads_Total kWh(kWh)','Loads_Total Cust','Loads_Phase Type','Loads_Config',
                               'Loads_Locked','Loads_Load Model'] #'Loads_TransformerID','Loads_Phase','Loads_Nameplate',

            Fuses_drop_cols =['Fuses_Network Id', 'Fuses_Status','Fuses_State','Fuses_Phase','Fuses_Manufacturer',
                              'Fuses_Model', 'Fuses_Voltage(kV)', 'Fuses_Voltage Class', 'Fuses_Standard', 'Fuses_Rating(A)',
                              'Fuses_Rating','Fuses_Interrupting Rating(A)', 'Fuses_From Node', 'Fuses_To Node'] 
                                #  'Fuses_Section Id', 'Fuses_Equipment Id', Fuses_Rating' 

            OHlines_drop_cols=[]

            #V6 changes
            Cables_drop_cols =['Cables_Network Id','Cables_Equipment Id','Cables_Line Id','Cables_Status','Cables_Phase',
                                  'Cables_# parallel','Cables_Manufacturer','Cables_Standard',
                                  'Cables_Rated Voltage(kV)','Cables_Ampacity(A)','Cables_Withstand(A)','Cables_Cable Type',
                                  'Cables_Conductor Material','Cables_Sheathed','Cables_Concentric Neutrals','Cables_Line R1(ohms)',
                                  'Cables_Line X1(ohms)','Cables_Line B1(µS)','Cables_Line R0(ohms)','Cables_Line X0(ohms)',
                                  'Cables_Line B0(µS)','Cables_Harmonic Model', 'Cables_FromNodeID_1','Cables_ToNodeID_1',
                                  'Cables_FromNodeID_2','Cables_ToNodeID_2','Cables_From Node', 'Cables_To Node',] 
                                #'Cables_From Node','Cables_To Node','Cables_Length(m)','Cables_Size','Cables_insulation'

            # Poles_deleted_cols=[]

            combined_drop_cols = (Nodes_drop_cols + Switches_drop_cols + Tx_drop_cols + Fuses_drop_cols + 
                                  OHlines_drop_cols + Cables_drop_cols)

            #print('Number of columns deleted: ', len(combined_drop_cols))
            #print('Number of rows: ', len(dfNodesCopy['Nodes_Network Id']))

            #Drop columns
            dfNodesCopy=dfNodesCopy.drop(combined_drop_cols, axis=1)

            dfNodesPRID = dfNodesCopy

            moreNodesCols = ['Switches_Section Id', 'Switches_State','Switches_Rating(A)', 'Fuses_Section Id', 'Fuses_Equipment Id', 
                 'Cables_Section Id','Cables_Length(m)', 'Cables_Size', 'Cables_Insulation','CablesSwitchRegionFrom', 
                 'CablesSwitchRegionEnd']
            dfNodesPRID = drop_columns(dfNodesPRID, moreNodesCols)

            dfNodesPRID = dfNodesPRID[(dfNodesPRID.TransformerID.notnull())]
            dfNodesPRID_lookup = dfNodesPRID
            #print(dfNodesPRID.shape)
            #dfNodesPRID = dfNodesPRID.rename(columns={'Nodes_Network Id':'circuit'})
            #print(dfNodesPRID.dtypes)
            dfNodesPRID['Nameplate'] = dfNodesPRID['Nameplate'].astype(float)
            #dfNodesPRID['Loads_Phase'] = dfNodesPRID['Loads_Phase'].astype(str)
            #SwitchPRID = pd.Series(dfNodesPRID['SwitchRegion'].values.ravel()).unique()

            #change this to reflect UG and OH Tx
            # currently assumed all under 150kVA is Rx in 1-2Ph
            dfFdr_Rx = dfNodesPRID[(dfNodesPRID['Loads_Phase'] != 'RWB') & (dfNodesPRID['Nameplate'] <=150)]
            #dfFdr_UGtx_Rx = dfFdr_UGtx[(dfFdr_UGtx['phasing'] != 3) & (dfFdr_UGtx['kva'] <=100)]
            dfFdr_Rx = dfFdr_Rx.rename(columns={'Nameplate': RES_LOAD})
            # UGtx: 3Ph: 100-350kVA, 1Ph: <100kVA; group it by less than 350kVA # OHtx: 3Ph, 
            dfFdr_Med = dfNodesPRID[(dfNodesPRID['Loads_Phase'] == 'RWB') & (dfNodesPRID['Nameplate'] <=350)]
            # dfFdr_UGtx_Med = dfFdr_UGtx[(dfFdr_UGtx['phasing'] == 3) & (dfFdr_UGtx['kva'] <=100) |
            #                             (dfFdr_UGtx['phasing'] != 3) & ((dfFdr_UGtx['kva'] > 100) & (dfFdr_UGtx['kva'] <=350))]
            dfFdr_Med = dfFdr_Med.rename(columns={'Nameplate': MED_COM_LOAD})
            #UGTx: Greater than 350kVA
            dfFdr_Large = dfNodesPRID[(dfNodesPRID['Nameplate'] > 350)]
            dfFdr_Large = dfFdr_Large.rename(columns={'Nameplate': LARGE_LOAD})
            dfFdrs_Loads = pd.concat([dfFdr_Rx, dfFdr_Med, dfFdr_Large])
            dfFdrs_Loads = drop_columns(dfFdrs_Loads, ['Loads_Phase'])
            dfFdrs_total = pd.DataFrame(dfFdrs_Loads.groupby('SwitchRegion').sum()).reset_index()

            #50% nameplate rating
            #**********************UNCOMMENT THIS**********#
            # dfFdrs_total[RES_LOAD] = dfFdrs_total[RES_LOAD].apply(lambda x: x/2)
            # dfFdrs_total[COM_LOAD] = dfFdrs_total[COM_LOAD].apply(lambda x: x/2)
            # dfFdrs_total[IND_LOAD] = dfFdrs_total[IND_LOAD].apply(lambda x: x/2)

            dfFdrs_total = dfFdrs_total.merge(dfNodesPRID_lookup, how='left', on='SwitchRegion')
            dfFdrs_total['SwitchRegion'] = dfFdrs_total['SwitchRegion'].astype(str)
            dfFdrs_total['Nodes_Network Id'] = dfFdrs_total['Nodes_Network Id'].astype(str)
            
            dfFdrs_total['PRID'] = dfFdrs_total['SwitchRegion'] + '_' + dfFdrs_total['Nodes_Network Id']
            dfFdrs_total = drop_columns(dfFdrs_total, ['Nameplate', 'Loads_Phase', 'SwitchRegion', 'Nodes_Network Id'])
            #print(dfFdrs_total.head())
            #dfFdrs_temp = dfFdrs_total
            if count == 1:
                dfFdrs_Final = dfFdrs_total
                dfFdrs_temp = dfFdrs_Final
            else:
                dfFdrs_Final = pd.concat([dfFdrs_temp, dfFdrs_total])
                dfFdrs_temp = dfFdrs_Final

            #'TransformerID', 'Loads_Phase', 'Nameplate', 'SwitchRegion'

            
            
MasterFile = pd.ExcelWriter('V4_PRID_AllNodes.xlsx')
dfFdrs_Final.to_excel(MasterFile, 'Sheet1')
MasterFile.save()

print(dfFdrs_Final.shape)

Number of Feeders:  35
(3585, 7)
