In [198]:
import pandas as pd
import numpy as np

from pandas_profiling import ProfileReport
import plotly.express as px
import re

from itertools import groupby
from datetime import datetime
import math

#import re
#from nltk.stem import WordNetLemmatizer, PorterStemmer, SnowballStemmer

#from nltk.corpus import stopwords
#from nltk.tokenize import word_tokenize

import networkx as nx

from networkx.readwrite import json_graph

import seaborn as sns

import matplotlib.pyplot as plt

import zipfile

pd.options.display.max_columns = 100
pd.options.display.max_rows = 300
pd.options.display.max_colwidth = 50

In [253]:
df_spec = pd.read_excel("../data/external/Exhibit C_Leveled Bid - Specs_RFP.xlsx", 
                   sheet_name='Serenity Leveled Bid Request',
                   header=12)

df_spec.drop(['Unnamed: 0'], axis=1, inplace=True)
df_spec.fillna('DNA', inplace=True) ## DNA: Does Not Apply
df_spec.shape

(85, 26)

In [254]:
upd_ix = [(x,y) for (x,y) in enumerate(df_spec.Area) if y != 'DNA']
##upd_ix

In [255]:
for i in range(len(upd_ix) - 1):
    df_spec.Area.iloc[upd_ix[i][0]: upd_ix[i+1][0]] = upd_ix[i][1]

i = i+1
df_spec.Area.iloc[upd_ix[i][0]:] = upd_ix[i][1]

In [256]:
df_spec = df_spec[['Area', 'Cost center', 'Item', 'Category', 'SKU/Spec', 'Color/Notes',
       'Reference']]

ix = df_spec.iloc[:,1:].apply(lambda x: np.sum(x == 'DNA') != len(x), axis=1)
df_spec = df_spec.loc[ix==True,:]

df_spec.head(2)

Unnamed: 0,Area,Cost center,Item,Category,SKU/Spec,Color/Notes,Reference
0,Kitchen,3-05-5410,Cabinets,Material,5pc all wood shaker style cabinet - door & dra...,White above / Gray below,DNA
1,Kitchen,3-05-5410,Cabinets Install,Labor,"Remove existing doors, prep+paint boxes, insta...",DNA,DNA


In [212]:
x = df_spec.Area.value_counts()
##for index, value in zip(x.index, x):
##    print (index, value)
x

Kitchen                            22
Bath                               15
Overhead                           10
Floors                              9
Paint & Other Misc Living Areas     8
Repairs                             4
General Conditions                  4
Windows                             1
Name: Area, dtype: int64

In [213]:
arr = []

for index, value in zip(x.index, x):
    arr.extend([[index, y] for y in np.linspace(1,value, value).astype(int)])
    
idx_df = pd.DataFrame(arr, columns=['Area', 'idx'])
idx_df.sort_values(by=['Area', 'idx'], inplace=True)
idx_df.reset_index(drop=True, inplace=True)
idx_df.shape

(73, 2)

In [258]:
df_spec.sort_values(by=['Area', 'Cost center', 'Item'], inplace=True)
df_spec.reset_index(drop=True, inplace=True)

df_spec['idx'] = idx_df.idx
df_spec.head(2)

Unnamed: 0,Area,Cost center,Item,Category,SKU/Spec,Color/Notes,Reference,idx
0,Bath,3-05-5260,"Interior Finish Work, Other",Labor,Bath Finish Accessories & Misc Carpentry - labor,DNA,DNA,1
1,Bath,3-05-5270,Bath / Shower,Material,Water-Saving Massage Showerhead,"Brushed Nickel, Brass Ball Joint",LINK,2


In [308]:
def prep_bid(df_in):
    df = df_in.copy()
    df.drop(['Unnamed: 0'], axis=1, inplace=True)
    df.fillna('DNA', inplace=True) ## DNA: Does Not Apply
    
    upd_ix = [(x,y) for (x,y) in enumerate(df.Area) if y != 'DNA']
    
    for i in range(len(upd_ix) - 1):
        df.Area.iloc[upd_ix[i][0]: upd_ix[i+1][0]] = upd_ix[i][1]

    i = i+1
    df.Area.iloc[upd_ix[i][0]:] = upd_ix[i][1]
    
    df = df.loc[df['Cost center'] != 'DNA',:]
    
    df.sort_values(by=['Area', 'Cost center', 'Item'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    x = df_spec.Area.value_counts()
    
    arr = []

    for index, value in zip(x.index, x):
        arr.extend([[index, y] for y in np.linspace(0,value-1, value-1).astype(int)])

    idx_df = pd.DataFrame(arr, columns=['Area', 'idx'])
    idx_df.sort_values(by=['Area', 'idx'], inplace=True)
    idx_df.reset_index(drop=True, inplace=True)
    
    df['idx'] = idx_df.idx
    
    cols_q = [x for x in np.where([x.startswith('Quantity') for x in df.columns])[0]]

    cols = cols_q.copy()

    cols.extend([x + 1 for x in cols_q])
    cols.extend([x + 2 for x in cols_q])
    cols.sort()
    
    k_cols = [0, 2, np.where(['idx' in x for x in df.columns])[0][0]]
    
    lst = list()
    repeat = sum(['Quantity' in x for x in df_bid_rams.columns])
    for i in range(repeat):
        c_cols = k_cols.copy()
        c_cols.extend(cols[3*i:3*i+3])
        print(c_cols)
        print(i)
        lst.append(df.iloc[:,c_cols])
        
    df = pd.concat(lst, axis=0)
    return(df)
    

In [309]:
df_bid_rams = pd.read_excel("../data/external/RFP Cost Comparison_Tracker_v9_3.0 Submission 1.6.21.xlsx", 
                   sheet_name='R.A.M.S 2.0', header=6)

df_bid_rams = prep_bid(df_bid_rams)

df_bid_rams.head(2)

[0, 2, 26, 7, 8, 9]
0
[0, 2, 26, 10, 11, 12]
1
[0, 2, 26, 13, 14, 15]
2
[0, 2, 26, 16, 17, 18]
3
[0, 2, 26, 19, 20, 21]
4


Unnamed: 0,Area,Item,idx,Quantity,Unit,Price,Quantity.1,Unit.1,Price.1,Quantity.2,Unit.2,Price.2,Quantity.3,Unit.3,Price.3,Quantity.4,Unit.4,Price.4
0,Bath,"Interior Finish Work, Other",0.0,2,Count,300.328,,,,,,,,,,,,
1,Bath,Bath / Shower,1.0,2,Count,0.0,,,,,,,,,,,,


In [261]:
cols_q = [x for x in np.where([x.startswith('Quantity') for x in df_bid_rams.columns])[0]]

cols = cols_q.copy()

cols.extend([x + 1 for x in cols_q])
cols.extend([x + 2 for x in cols_q])
cols.sort()
cols

[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

In [280]:
np.where(['idx' in x for x in df_bid_rams.columns])[0][0]

26

In [289]:
k_cols = [0, 2, np.where(['idx' in x for x in df_bid_rams.columns])[0][0]]
k_cols.extend(cols[12:15])
df_bid_rams.iloc[:,k_cols]


Unnamed: 0,Area,Item,idx,Quantity.4,Unit.4,Price.4
0,Bath,"Interior Finish Work, Other",0.0,1,Count,143.862
1,Bath,Bath / Shower,1.0,1,Count,24.3458
2,Bath,Vanity,2.0,0,Door/Drawer,213.746
3,Bath,Vanity,3.0,0,Door/Drawer,213.579
4,Bath,Vanity top / sink,4.0,DNA,LNIN,67.5043
5,Bath,Vanity top / sink,5.0,DNA,LNIN,191.373
6,Bath,Vanity top / sink,6.0,1,Count,71.4209
7,Bath,Mirror,7.0,1,Count,96.4095
8,Bath,"Finish Plumbing, Other",8.0,1,Count,342.903
9,Bath,Toliet,9.0,1,Count,170.421


In [297]:
rpt = sum(['Quantity' in x for x in df_bid_rams.columns])
for i in range(rpt):
    print(cols[3*i: 3*i+3])

[7, 8, 9]
[10, 11, 12]
[13, 14, 15]
[16, 17, 18]
[19, 20, 21]
