In [163]:
import pandas as pd
import warnings
from dateutil.parser import parse
warnings.filterwarnings('ignore')

### Load the buildings and cpq data

In [164]:
PATH = 'webapp/data/'

cpq = pd.read_csv(PATH + 'ZayoHackathonData_CPQs.csv')
buildings = pd.read_csv(PATH + 'ZayoHackathonData_Buildings.csv')

### Remove rows where Account ID, Product Group and Building ID is repeated - except the row where a said Account ID, Product Group and Building ID occurs

In [165]:
cpq['CreatedDate'] = cpq['CreatedDate'].apply(lambda x : parse(x))
cpq.sort_values(by='CreatedDate', inplace=True)
cpq.drop_duplicates(['Account ID', 'Product Group', 'Building ID'], inplace=True)

### Get the count of records by the attribute 'On Zayo Network Status'

In [166]:
cpq['On Zayo Network Status'].value_counts()

On Zayo Network        5624
Not on Zayo Network    1214
Build in Progress        82
Name: On Zayo Network Status, dtype: int64

In [167]:
cpq.columns

Index([u'CPQ ID', u'Account ID', u'CreatedDate', u'Product Group',
       u' X36 MRC List ', u' X36 NRR List ', u' X36 NPV List ', u'Building ID',
       u'Market', u'Street Address', u'City', u'State', u'Postal Code',
       u'Network Proximity', u'On Zayo Network Status'],
      dtype='object')

### Merge the cpq and buildings by doing an inner join

In [168]:
cpq_business = pd.merge(cpq, buildings, how='inner')

In [169]:
cpq_business.head()

Unnamed: 0,CPQ ID,Account ID,CreatedDate,Product Group,X36 MRC List,X36 NRR List,X36 NPV List,Building ID,Market,Street Address,City,State,Postal Code,Network Proximity,On Zayo Network Status,Latitude,Longitude,Net Classification,Type,Estimated Build Cost
0,CPQ-009438,Acct-000469,2016-07-01,Wavelengths - Metro,"$2,320.52","$3,227.00","$37,502.34",Bldg-065056,Dallas,2323 Bryan St,Dallas,TX,75201,64.67,On Zayo Network,32.787255,-96.794118,Fiber Only,Carrier Hotel,"$15,880.20"
1,CPQ-009494,Acct-000700,2016-07-05,Wavelengths - Long Haul,"$2,791.26","$2,766.00",$-,Bldg-065056,Dallas,2323 Bryan St,Dallas,TX,75201,64.67,On Zayo Network,32.787255,-96.794118,Fiber Only,Carrier Hotel,"$15,880.20"
2,CPQ-009517,Acct-000043,2016-07-06,Ethernet,"$1,078.38",$922.00,"$21,228.45",Bldg-065056,Dallas,2323 Bryan St,Dallas,TX,75201,64.67,On Zayo Network,32.787255,-96.794118,Fiber Only,Carrier Hotel,"$15,880.20"
3,CPQ-009482,Acct-000069,2016-07-06,Ethernet,$910.98,$922.00,"$16,035.62",Bldg-065056,Dallas,2323 Bryan St,Dallas,TX,75201,64.67,On Zayo Network,32.787255,-96.794118,Fiber Only,Carrier Hotel,"$15,880.20"
4,CPQ-009474,Acct-000019,2016-07-06,Wavelengths - Metro,"$1,423.57","$2,766.00","$24,683.18",Bldg-065056,Dallas,2323 Bryan St,Dallas,TX,75201,64.67,On Zayo Network,32.787255,-96.794118,Fiber Only,Carrier Hotel,"$15,880.20"


In [170]:
len(cpq.loc[cpq['Building ID'] == 'Bldg-108671']), len(cpq_business.loc[cpq_business['Building ID'] == 'Bldg-108671'])

(59, 59)

In [171]:
cpq_business['Building ID'] = cpq_business['Building ID'].apply(str)
cpq_business['Building ID'].unique()

array(['Bldg-065056', 'Bldg-012582', 'Bldg-038069', ..., 'Bldg-122081',
       'Bldg-115638', 'Bldg-525138'], dtype=object)

### Select only those records that are not on Zayo Network

In [172]:
cpq_status = cpq_business.loc[cpq_business['On Zayo Network Status'] == 'Not on Zayo Network']

In [173]:
cpq_status.rename(columns={' X36 MRC List ': 'X36 MRC', ' X36 NRR List ': 'X36 NRR', ' X36 NPV List ': 'X36 NPV'}, inplace=True)

### Get price in numbers

In [174]:
cpq_status['X36 MRC'] = cpq_status['X36 MRC'].replace('[\$,)]','',regex=True).astype(float)
cpq_status['X36 NRR'] = cpq_status['X36 NRR'].str.replace(r'[$,]', '')
cpq_status['X36 NRR'] = cpq_status['X36 NRR'].str.replace('-', '0')
cpq_status['X36 NPV'] = cpq_status['X36 NPV'].str.replace(r'[$,]', '')
cpq_status['X36 NPV'] = cpq_status['X36 NPV'].str.replace('-', '0')
cpq_status[['X36 NRR','X36 NPV']] = cpq_status[['X36 NRR','X36 NPV']].apply(pd.to_numeric)
cpq_status[' Estimated Build Cost '] = cpq_status[' Estimated Build Cost '].replace('[\$,)]','',regex=True).astype(float)
cpq_status.head()

Unnamed: 0,CPQ ID,Account ID,CreatedDate,Product Group,X36 MRC,X36 NRR,X36 NPV,Building ID,Market,Street Address,City,State,Postal Code,Network Proximity,On Zayo Network Status,Latitude,Longitude,Net Classification,Type,Estimated Build Cost
497,CPQ-009426,Acct-000024,2016-07-01,IP Services,3469.99,922.0,28433.59,Bldg-449138,Dallas,15050 Trinity Blvd,Fort Worth,TX,76155,1795.08,Not on Zayo Network,32.820624,-97.040988,Fiber Only,Office - Multi Tenant,56877.0
1501,CPQ-009421,Acct-000039,2016-07-01,Ethernet,1543.49,461.0,10914.5,Bldg-133508,Dallas,5101 Statesman Dr,Irving,TX,75063,607.02,Not on Zayo Network,32.935867,-97.017868,On-Net,Office - Single Tenant,27175.5
1502,CPQ-009604,Acct-000074,2016-07-06,Ethernet,1543.49,461.0,10914.5,Bldg-133508,Dallas,5101 Statesman Dr,Irving,TX,75063,607.02,Not on Zayo Network,32.935867,-97.017868,On-Net,Office - Single Tenant,27175.5
1503,CPQ-010600,Acct-000114,2016-08-02,Ethernet,1543.49,461.0,10914.5,Bldg-133508,Dallas,5101 Statesman Dr,Irving,TX,75063,607.02,Not on Zayo Network,32.935867,-97.017868,On-Net,Office - Single Tenant,27175.5
1504,CPQ-001441,Acct-000034,2016-11-03,Ethernet,1543.49,461.0,10914.5,Bldg-133508,Dallas,5101 Statesman Dr,Irving,TX,75063,607.02,Not on Zayo Network,32.935867,-97.017868,On-Net,Office - Single Tenant,27175.5


In [175]:
len(cpq_status['Building ID'].unique())

794

### Filter out the records based on the state

In [176]:
cpq_CO = cpq_status.loc[cpq_status['State'] == 'CO']
cpq_TX = cpq_status.loc[cpq_status['State'] == 'TX']
cpq_GA = cpq_status.loc[cpq_status['State'] == 'GA']

### For each individual state, calculate the profit incurred by each building

In [177]:
CO_profit = cpq_CO.groupby(cpq_CO['Building ID'])['X36 NPV'].sum().reset_index()
CO_profit.sort_values(by='Building ID', inplace=True)
TX_profit = cpq_TX.groupby(cpq_TX['Building ID'])['X36 NPV'].sum().reset_index()
TX_profit.sort_values(by='Building ID', inplace=True)
GA_profit = cpq_GA.groupby(cpq_GA['Building ID'])['X36 NPV'].sum().reset_index()
GA_profit.sort_values(by='Building ID', inplace=True)

### For each building, get the total number of accounts associated with the building

In [178]:
CO_accounts = cpq_CO.groupby(cpq_CO['Building ID'])['Account ID'].count().reset_index()
CO_accounts.sort_values(by='Building ID', inplace=True)
TX_accounts = cpq_TX.groupby(cpq_TX['Building ID'])['Account ID'].count().reset_index()
TX_accounts.sort_values(by='Building ID', inplace=True)
GA_accounts = cpq_GA.groupby(cpq_GA['Building ID'])['Account ID'].count().reset_index()
GA_accounts.sort_values(by='Building ID', inplace=True)

In [179]:
CO_accounts.head()

Unnamed: 0,Building ID,Account ID
0,Bldg-059940,2
1,Bldg-068378,1
2,Bldg-074834,1
3,Bldg-081662,4
4,Bldg-082112,3


### Get the estimated build cost for each state

In [180]:
build_cost_CO = cpq_CO.groupby(['Building ID',' Estimated Build Cost ']).size().reset_index().rename(columns={0:'count'})
build_cost_TX = cpq_TX.groupby(['Building ID',' Estimated Build Cost ']).size().reset_index().rename(columns={0:'count'})
build_cost_GA = cpq_GA.groupby(['Building ID',' Estimated Build Cost ']).size().reset_index().rename(columns={0:'count'})

### Total profit = Profit incurred by each building - Estimated build cost

In [181]:
CO_profit['Estimated Build Cost'] = build_cost_CO[' Estimated Build Cost ']
CO_profit['Profit Including Build Cost'] = CO_profit['X36 NPV'] - CO_profit['Estimated Build Cost']
CO_profit['Number of Accounts'] = CO_accounts['Account ID']

In [182]:
TX_profit['Estimated Build Cost'] = build_cost_TX[' Estimated Build Cost ']
TX_profit['Profit Including Build Cost'] = TX_profit['X36 NPV'] - TX_profit['Estimated Build Cost']
TX_profit['Number of Accounts'] = TX_accounts['Account ID']

In [183]:
GA_profit['Estimated Build Cost'] = build_cost_GA[' Estimated Build Cost ']
GA_profit['Profit Including Build Cost'] = GA_profit['X36 NPV'] - GA_profit['Estimated Build Cost']
GA_profit['Number of Accounts'] = GA_accounts['Account ID']

In [184]:
CO_profit.sort_values(by='Profit Including Build Cost', ascending=False, inplace=True)
TX_profit.sort_values(by='Profit Including Build Cost', ascending=False, inplace=True)
GA_profit.sort_values(by='Profit Including Build Cost', ascending=False, inplace=True)

In [185]:
CO_profit.head()

Unnamed: 0,Building ID,X36 NPV,Estimated Build Cost,Profit Including Build Cost,Number of Accounts
160,Bldg-430143,480159.37,35981.5,444177.87,4
253,Bldg-519433,357294.76,14366.5,342928.26,1
86,Bldg-158872,319506.62,22971.4,296535.22,2
81,Bldg-158866,334458.16,49844.5,284613.66,3
128,Bldg-266860,242771.91,18408.4,224363.51,5


In [186]:
TX_profit.head()

Unnamed: 0,Building ID,X36 NPV,Estimated Build Cost,Profit Including Build Cost,Number of Accounts
114,Bldg-386004,392308.73,62026.25,330282.48,7
21,Bldg-118398,277511.02,25200.6,252310.42,1
94,Bldg-293930,254597.79,19360.2,235237.59,3
58,Bldg-139825,262124.91,29809.8,232315.11,16
105,Bldg-366026,260224.38,37675.0,222549.38,1


In [187]:
GA_profit.head()

Unnamed: 0,Building ID,X36 NPV,Estimated Build Cost,Profit Including Build Cost,Number of Accounts
57,Bldg-115684,632439.0,24570.7,607868.3,18
100,Bldg-233683,508875.91,69739.12,439136.79,9
33,Bldg-070188,426020.76,67006.72,359014.04,3
32,Bldg-068834,386640.77,65354.08,321286.69,9
54,Bldg-115607,330542.8,18107.8,312435.0,1


### Merge the profits dataframe with the original datafrae

In [188]:
co_buildings_latlong = pd.merge(CO_profit, cpq_status, on='Building ID', how='inner')
tx_buildings_latlong = pd.merge(TX_profit, cpq_status, on='Building ID', how='inner')
ga_buildings_latlong = pd.merge(GA_profit, cpq_status, on='Building ID', how='inner')

In [189]:
co_buildings_latlong.head()

Unnamed: 0,Building ID,X36 NPV_x,Estimated Build Cost,Profit Including Build Cost,Number of Accounts,CPQ ID,Account ID,CreatedDate,Product Group,X36 MRC,...,City,State,Postal Code,Network Proximity,On Zayo Network Status,Latitude,Longitude,Net Classification,Type,Estimated Build Cost.1
0,Bldg-430143,480159.37,35981.5,444177.87,4,CPQ-012244,Acct-000397,2016-09-08,Dark Fiber - Metro,7507.43,...,Denver,CO,80202,786.05,Not on Zayo Network,39.753224,-105.000157,On-Net,Office - Multi Tenant,35981.5
1,Bldg-430143,480159.37,35981.5,444177.87,4,CPQ-012406,Acct-000350,2016-09-13,Dark Fiber - Metro,7507.43,...,Denver,CO,80202,786.05,Not on Zayo Network,39.753224,-105.000157,On-Net,Office - Multi Tenant,35981.5
2,Bldg-430143,480159.37,35981.5,444177.87,4,CPQ-005682,Acct-000169,2017-01-05,Dark Fiber - Metro,7507.43,...,Denver,CO,80202,786.05,Not on Zayo Network,39.753224,-105.000157,On-Net,Office - Multi Tenant,35981.5
3,Bldg-430143,480159.37,35981.5,444177.87,4,CPQ-005767,Acct-000274,2017-01-06,Dark Fiber - Metro,7507.43,...,Denver,CO,80202,786.05,Not on Zayo Network,39.753224,-105.000157,On-Net,Office - Multi Tenant,35981.5
4,Bldg-519433,357294.76,14366.5,342928.26,1,CPQ-006575,Acct-000397,2017-02-28,Dark Fiber - Metro,15901.9,...,Denver,CO,80223,43.7,Not on Zayo Network,39.857676,-104.583343,Fiber Only,Splice Point,14366.5


### Generate the profits csv by combining the state dataframes. This csv would be used to generate the data table for the first visualization

In [190]:
co_buildings_latlong = co_buildings_latlong[['Building ID', 'X36 NPV_x', 'Estimated Build Cost', 'Profit Including Build Cost',
                                            'Latitude', 'Longitude', 'State', 'Number of Accounts', 'Street Address',
                                            'Postal Code', 'Net Classification', 'Type']]
co_buildings_latlong.drop_duplicates(['Building ID'], inplace=True)
tx_buildings_latlong = tx_buildings_latlong[['Building ID', 'X36 NPV_x', 'Estimated Build Cost', 'Profit Including Build Cost',
                                            'Latitude', 'Longitude', 'State', 'Number of Accounts', 'Street Address',
                                            'Postal Code', 'Net Classification', 'Type']]
tx_buildings_latlong.drop_duplicates(['Building ID'], inplace=True)
ga_buildings_latlong = ga_buildings_latlong[['Building ID', 'X36 NPV_x', 'Estimated Build Cost', 'Profit Including Build Cost',
                                            'Latitude', 'Longitude', 'State', 'Number of Accounts', 'Street Address',
                                            'Postal Code', 'Net Classification', 'Type']]
ga_buildings_latlong.drop_duplicates(['Building ID'], inplace=True)

In [191]:
profits = pd.concat([co_buildings_latlong, tx_buildings_latlong, ga_buildings_latlong])

In [192]:
profits.to_csv('profits.csv')

### Identify the total number of buildings for each state and write that to a csv

In [193]:
co_sum = CO_profit['Profit Including Build Cost'].sum()
buildings_co = len(CO_profit)

In [194]:
tx_sum = TX_profit['Profit Including Build Cost'].sum()
buildings_tx = len(TX_profit)

In [195]:
ga_sum = GA_profit['Profit Including Build Cost'].sum()
buildings_ga = len(GA_profit)

In [196]:
import csv
vals = [['CO', co_sum, buildings_co], ['TX', tx_sum, buildings_tx], ['GA', ga_sum, buildings_ga]]

with open('profit_by_state.csv','wb') as f:
    w = csv.writer(f)
    w.writerow(['State','Total Profit', 'Number of Buildings'])
    for v in vals:
        w.writerow(v)