In [1]:
from datetime import date, time

import pandas as pd
import numpy as np


pd.options.display.max_columns = 100
pd.options.display.max_rows = 300
pd.options.display.max_colwidth = 50

### The following section has been developed to created a rule table for the spec metadata. This logic is tuned to the XL created by Ashish. If work continues in this drirection to capture spec metadata, this section of the code will be useful

In [42]:
df_md = pd.read_excel("../data/external/New SKUs and SKUs Meta Data v1.0.xlsx", 
                   sheet_name='SKU Tags Meta Data', header=1)

df_md.rename(columns={'Unnamed: 0':'Area', 'Unnamed: 1':'Scope'}, inplace=True)
df_md.drop([x for i,x in enumerate(df_md.columns.values) if x.startswith('Un')], inplace=True, axis=1)
df_md.fillna("UNK", inplace=True)

df_md.head()

Unnamed: 0,Area,Scope,name,possible values,name.1,possible values.1,name.2,possible values.2
0,Kitchen,Cabinets,style,"shaker, flat face",material,"all wood, mdf, thermofoil",finish,"painted, stained"
1,Kitchen,Cabinet - Paint,material,UNK,finish,UNK,UNK,UNK
2,Kitchen,Counters,material,UNK,finish,UNK,thickness,UNK
3,Kitchen,Sink,basins,UNK,holes,UNK,mount,UNK
4,Kitchen,Faucet,style,"pull down, down flex",holes,UNK,UNK,UNK


In [88]:
m_lst = []

for idx in range(2):
    temp = df_md[['Area', 'Scope']].copy()
    temp['tag'] = df_md.iloc[:,idx*2 + 2]
    temp['value'] = df_md.iloc[:,idx*2 + 3]

    lst = []

    for i in temp.index:
        for v_cnt in range(len(temp.value[i].split(','))):
            l1 = [x for x in temp.iloc[i,0:3].values]
            l2 = [temp.value[i].split(',')[v_cnt].strip()]

            l1.extend((l2))

            if np.sum([x == 'UNK' for x in l1]) < 4:
                lst.append(l1)

        temp_df = pd.DataFrame(lst)
        temp_df.columns = ['Area', 'Scope', 'Tag', 'Value']
        
    m_lst.append(temp_df)

In [93]:
f_df = pd.concat(m_lst, axis=0)
f_df.head()

Unnamed: 0,Area,Scope,Tag,Value
0,Kitchen,Cabinets,style,shaker
1,Kitchen,Cabinets,style,flat face
2,Kitchen,Cabinet - Paint,material,UNK
3,Kitchen,Counters,material,UNK
4,Kitchen,Sink,basins,UNK


### The following code has been created to load a Postgres table with a structured schema and will contain the SKU data.

In [306]:
df_ext = pd.read_excel("../data/external/lightning bid master template v3 price data.xlsm", 
                   sheet_name='Price Data', header=1)

df_ext.drop([x for x in df_ext.columns if x.startswith('Unnamed: ')], axis=1, inplace=True)
df_ext.columns = [x.replace(' ', '_') for x in df_ext.columns]
df_ext.columns = [x.replace('(', '') for x in df_ext.columns]
df_ext.columns = [x.replace(')', '') for x in df_ext.columns]
df_ext.columns = [x.replace('#', '') for x in df_ext.columns]

df_ext.drop(np.where(df_ext.Category.isnull() == True)[0], axis=0, inplace=True)

df_ext.reset_index(inplace=True, drop=True)

df_ext.RFP_Notes = df_ext.RFP_Notes.astype('object')
df_ext.Catalogue_ID_ = df_ext.Catalogue_ID_.astype('object')

key_fields = ['Category', 'Sub_Category', 'Work_Type', 'RFP_SPEC', 'RFP_SPEC_Tags']

other_required_fields = ['RFP_Notes', 'Owner', 'Property_Manager', 'Geography', 'Manufacture',
       'SKU_', 'Supplier', 'Catalogue_ID_', 'Retail_Name', 'Brandline', 'Notes', 'URL']

tag_fields = ['Color', 'Style_A', 'Style_B', 'Material', 'Finish',
       'Quality', 'Defining_Size', 'Tailorbird_Catalogue', 'Takeoff_Codex', 'Unit']

numeric_fields = ['Price']

info_fields = ['Price_Notes', 'Price_Source_Notes', 'Archive_Name']

df_ext.loc[:,key_fields] = df_ext[key_fields].fillna('ALL', axis=1)
df_ext.loc[:,other_required_fields] = df_ext[other_required_fields].fillna('ANY',  axis=1)
df_ext.loc[:,tag_fields] = df_ext[tag_fields].fillna('DNA', axis=1)
df_ext.loc[:,numeric_fields] = df_ext[numeric_fields].fillna(0, axis=1)
df_ext.loc[:,info_fields] = df_ext[info_fields].fillna('MI', axis=1)

df_ext = df_ext.apply(lambda x: x.str.upper(), axis=1)

df_ext.head(2)

Unnamed: 0,Category,Sub_Category,Work_Type,RFP_SPEC,RFP_SPEC_Tags,RFP_Notes,Owner,Property_Manager,Geography,Manufacture,SKU_,Supplier,Catalogue_ID_,Retail_Name,Brandline,Notes,URL,Color,Style_A,Style_B,Material,Finish,Quality,Defining_Size,Tailorbird_Catalogue,Takeoff_Codex,Unit,Price,Price_Notes,Price_Source_Notes,Archive_Name
0,KITCHEN,CABINET - BOX SETS,MATERIAL,"CABINET - BOX SETS, WOOD, SHAKER","SUB CATEGORY, MATERIAL, STYLE A",ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,INCLUDES UPPERS AND LOWERS,ANY,WHITE,SHAKER,DNA,WOOD,PAINTED,DNA,DNA,DNA,A,LNIN,,SHIPPING INCLUDED,MI,H1 KITCHEN CABINETS WOOD SHAKER - LNIN
1,KITCHEN,"CABINET - FRONTS, DOORS",MATERIAL,"CABINET - FRONTS, DOORS, WOOD, SHAKER","SUB CATEGORY, MATERIAL, STYLE A",ANY,CATALYST HOUSING GROUP,GREYSTAR NCA,NORTHERN CALIFORNIA,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,SHAKER,DNA,WOOD,DNA,DNA,DNA,DNA,B+C,COUNT,,MI,SERENITY RFP,H2 KITCHEN CABINETS WOOD SHAKER - NEW FRONTS


In [307]:
df_ext = df_ext[-(df_ext.Sub_Category.str.contains('Exclude'))]
df_ext.reset_index(drop=True, inplace=True)

##df_ext['spec'] = df_ext.Sub_Category.apply(lambda x: 'DNA' if len(x.split(' - ')) == 1 else x.split(' - ')[1].upper() )
##df_ext['scope'] = df_ext.Sub_Category.apply(lambda x: x.split(' - ')[0].upper() )

##df_ext.rename(columns={'Category':'area'}, inplace=True)
##df_ext.drop(['Sub_Category'], inplace=True, axis=1)

##df_ext = df_ext.reindex(['area', 'scope', 'spec',
##                'Work_Type', 'RFP_SPEC', 'RFP_SPEC_Tags', 'RFP_Notes',
##                'Owner', 'Property_Manager', 'Geography', 'Manufacture', 'SKU_',
##                'Supplier', 'Catalogue_ID_', 'Retail_Name', 'Brandline', 'Notes', 'URL',
##                'Color', 'Style_A', 'Style_B', 'Material', 'Finish', 'Quality',
##                'Defining_Size', 'Tailorbird_Catalogue', 'Takeoff_Codex', 'Unit',
##                'Price', 'Price_Notes', 'Price_Source_Notes', 'Archive_Name' ], axis=1)

In [308]:
df_ext.columns = [x.lower() for x in df_ext.columns]

df_ext

Unnamed: 0,area,scope,spec,work_type,rfp_spec,rfp_spec_tags,rfp_notes,owner,property_manager,geography,manufacture,sku_,supplier,catalogue_id_,retail_name,brandline,notes,url,color,style_a,style_b,material,finish,quality,defining_size,tailorbird_catalogue,takeoff_codex,unit,price,price_notes,price_source_notes,archive_name
0,KITCHEN,CABINET,BOX SETS,MATERIAL,"CABINET - BOX SETS, WOOD, SHAKER","SUB CATEGORY, MATERIAL, STYLE A",ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,INCLUDES UPPERS AND LOWERS,ANY,WHITE,SHAKER,DNA,WOOD,PAINTED,DNA,DNA,DNA,A,LNIN,,SHIPPING INCLUDED,MI,H1 KITCHEN CABINETS WOOD SHAKER - LNIN
1,KITCHEN,CABINET,"FRONTS, DOORS",MATERIAL,"CABINET - FRONTS, DOORS, WOOD, SHAKER","SUB CATEGORY, MATERIAL, STYLE A",ANY,CATALYST HOUSING GROUP,GREYSTAR NCA,NORTHERN CALIFORNIA,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,SHAKER,DNA,WOOD,DNA,DNA,DNA,DNA,B+C,COUNT,,MI,SERENITY RFP,H2 KITCHEN CABINETS WOOD SHAKER - NEW FRONTS
2,KITCHEN,CABINET,"FRONTS, DOORS",MATERIAL,"CABINET - FRONTS, DOORS, , FLAT","SUB CATEGORY, MATERIAL, STYLE A",ANY,CATALYST HOUSING GROUP,GREYSTAR NCA,NORTHERN CALIFORNIA,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,FLAT,DNA,DNA,DNA,DNA,DNA,DNA,B+C,COUNT,,MI,SERENITY RFP,H300 KITCHEN CABINETS FLAT FACE - NEW FRONTS
3,KITCHEN,CABINET,BOX SETS,MATERIAL,"CABINET - BOX SETS, WOOD, SHAKER","SUB CATEGORY, MATERIAL, STYLE A",ANY,ANY,ANY,ANY,ANY,ANY,MFS,ANY,ANY,ANY,INCLUDES UPPERS AND LOWERS,ANY,WHITE,SHAKER,DNA,WOOD,PAINTED,DNA,DNA,DNA,DNA,LNIN,,SHIPPING INCLUDED,MFS SUMMER '20,H3 KITCHEN CABINETS WOOD SHAKER
4,KITCHEN,CABINET,REFINISH AND PAINT BOXES AND FRONTS,LABOR & MATERIAL,"CABINET - REFINISH AND PAINT BOXES AND FRONTS, ,","SUB CATEGORY, MATERIAL, STYLE A",ANY,ANY,AVENUE 5,ARIZONA,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,DNA,DNA,DNA,DNA,DNA,A,LNIN,,MI,VERBAL PRICE PER AVE 5 AZ APRIL '21,H4 KITCHEN PAINT CABINETS
5,KITCHEN,CABINET,REFINISH AND PAINT BOXES,LABOR & MATERIAL,"CABINET - REFINISH AND PAINT BOXES, ,","SUB CATEGORY, MATERIAL, STYLE A",ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,DNA,DNA,DNA,DNA,DNA,B + C,COUNT,,MI,MI,H5 KITCHEN PAINT CABINETS- BOXES ONLY
6,KITCHEN,COUNTERTOPS,DNA,MATERIAL,"COUNTERTOPS, GRANITE, L1","SUB CATEGORY, MATERIAL, QUALITY",ANY,ANY,ANY,ANY,ANY,ANY,APRO,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,GRANITE,DNA,L1,DNA,DNA,D,SQFT,,SHIPPING INCLUDED,MI,H6 KITCHEN COUNTER TOP GRANITE L1
7,KITCHEN,COUNTERTOPS,DNA,MATERIAL,"COUNTERTOPS, QUARTZ, L1","SUB CATEGORY, MATERIAL, QUALITY",ANY,ANY,ANY,ANY,ANY,ANY,APRO,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,QUARTZ,DNA,L1,DNA,DNA,D,SQFT,,SHIPPING INCLUDED,MI,H7 KITCHEN COUNTER TOP QUARTZ
8,KITCHEN,COUNTERTOPS,DNA,MATERIAL,"COUNTERTOPS, QUARTZ, L1","SUB CATEGORY, MATERIAL, QUALITY",ANY,CATALYST HOUSING GROUP,GREYSTAR NCA,NORTHERN CALIFORNIA,ANY,ANY,ANY,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,QUARTZ,DNA,L1,2 CM,DNA,D,SQFT,,MI,RAMS AT 14/SF - OTHER HIGH 20S,H133 KITCHEN COUNTER TOP QUARTZ 2CM
9,KITCHEN,COUNTERTOPS,DNA,MATERIAL,"COUNTERTOPS, GRANITE, L2","SUB CATEGORY, MATERIAL, QUALITY",ANY,ANY,ANY,ANY,ANY,ANY,APRO,ANY,ANY,ANY,ANY,ANY,DNA,DNA,DNA,GRANITE,DNA,L2,DNA,DNA,D,SQFT,,SHIPPING INCLUDED,MI,H8 KITCHEN COUNTER TOP GRANITE L2


In [309]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://localhost/TB')

In [310]:
df_ext.to_sql('sku_data', con=engine, index=False, if_exists='replace')

### Bulding the Spec Metadata

In [245]:
pd.read_json("../data/external/sample.json")

Unnamed: 0,category_name,items
0,Kitchen,"[{'name': 'Cabinets', 'options': ['None', 'New..."
1,Appliances,"[{'name': 'Refrigerator Style', 'options': ['N..."
2,Bathroom,"[{'name': 'Cabinets', 'options': ['None', 'New..."
3,Flooring,"[{'name': 'Bedroom', 'options': ['None', 'New ..."
4,Paint & living fixtures,"[{'name': 'Paint', 'options': ['None', 'Fresh ..."
5,Repair & windows,"[{'name': 'Replace windows ?', 'options': ['No..."


In [243]:
import json

with open("../data/external/sample.json", encoding='utf-8') as f:
    data = json.load(f)


In [202]:
items = pd.json_normalize(data, record_path=['items'], meta=['category_name'])
items.head()

Unnamed: 0,name,options,category_name
0,Cabinets,"[None, New Boxsets, New Doors/Drawers, Paint D...",Kitchen
1,Cabinet Style,"[Any, Shaker, Flat]",Kitchen
2,Material,"[Any, MDF, All Wood]",Kitchen
3,Finish,"[Any, Painted, Thermofoil, Melamine]",Kitchen
4,Handle hardware,"[Any, Bars, Fingers]",Kitchen


In [203]:
items.shape

(77, 3)

## Read the spec object and match it with the SKU table

In [231]:
import json
from bson import ObjectId
from bson import json_util

class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, ObjectId):
            return str(o)
        return json.JSONEncoder.default(self, o)

##JSONEncoder().encode(analytics)

In [246]:
#with open("../data/external/project_josh.json", encoding='utf-8') as f:
#    p_data = json.load(f)

In [247]:
##json.loads(json_util.dumps(p_txt))

In [248]:
with open("../data/external/pinecone.json") as f:
    p_data = json.loads(json_util.dumps(f.read()))

In [249]:
##p_data

In [250]:
##pd.read_json(p_data)

## Develop code to load data into MongoDB

In [138]:
from pymongo import MongoClient
from pprint import pprint

In [5]:
client = MongoClient('mongodb://127.0.0.1:27017/')

In [8]:
db=client.admin
# Issue the serverStatus command and print the results
serverStatusResult=db.command("serverStatus")
##pprint(serverStatusResult)

In [7]:
db = client.tb