### Product Recomender Engine

**Developer:** Mayana Mohsin Khan <br/>

**Date Created:** 30th December 2020 <br/>
**Date Modified:** 1st January 2021

### Packages

In [49]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import openpyxl
import sqlalchemy 
import mysql.connector

### Load Dataset

In [50]:
data = pd.read_excel('specifications.xlsx')
data.drop([col for col in data.columns if "Unnamed" in col], axis=1, inplace=True)
data['partner'] = data['partner'].fillna('intel')
data.head()

Unnamed: 0,boms_id,partner,desc,product_id,name,chassis,motherboard,form_factor,led_indicators,on_off_switch,...,Intel® Build Assurance Technology,Intel® Efficient Power Technology,Intel® Quiet Thermal Technology,intel_virtual_technology_for_directed_i/o,Intel® Rapid Storage Technology enterprise,Intel® Quiet System Technology,Intel® Fast Memory Access,Intel® Flex Memory Access,TPM Version,Intel® Transparent Supply Chain
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,,,
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,...,,,,,,,,,,
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,,,
3,32,supermicro,836BE1C-R1K23B + X11DPi-N,,D3163-DS228-MDR,CSE-836BE1C-R1K23B,MBD-X11DPi-N,3U,,,...,,,,,,,,,,
4,33,supermicro,846BE1C-R1K23B + X11DPi-N,,D4243-DS228-MDR,CSE-846BE1C-R1K23B,MBD-X11DPi-N,4U,,,...,,,,,,,,,,


In [51]:
def connection():
    database_username = 'root'  # database username
    database_password = '123456789'  # database password
    database_ip = 'localhost'  # database server
    database_name = 'digicor_partner'  # database name
    # connection object to database
    database_connection = sqlalchemy.create_engine('mysql+mysqlconnector://{0}:{1}@{2}/{3}'.
                                                   format(database_username, database_password,
                                                          database_ip, database_name))
    return database_connection, database_name

In [52]:
connection, db_name = connection()

In [53]:
# function to extact boms table from database
def get_boms(connection):
    sql = 'SELECT * FROM digicor_partner.boms'
    boms = pd.read_sql(sql, connection)
    return boms

In [54]:
boms = get_boms(connection)
boms.head()

Unnamed: 0,id,name,deliver_option,desc,vendor,mpn,schema_sku,image_url,second_image_url,third_image_url,...,cpuf_id,boot_drive_exist,boot_drive_type,boot_drive_option,m2_boot_drive_exist,m2_boot_drive_type,m2_boot_drive_option,deleted_at,created_at,updated_at
0,25,Bom-Test-1,1.0,123123,,,,/storage/bom/Bom/bomxkmrhf6t.jpeg,,,...,86,1,1,1,1,2,3,2019-07-03 03:13:23,2019-06-20 05:22:49,2019-07-03 03:13:23
1,26,D2123-DS228-MDR,3.0,826BE1C4-R1K23LPB + X11DPi-N,D2123-DS228-MDR,D2123-DS228-MDR,,/storage/bom/Bom/D2123-DS228-MDR.jpg,/storage/bom/Bom/ssg-6027r-e1r12t_1.jpg,/storage/bom/Bom/d2123-ds228-mdr-third_image_u...,...,86,1,1,1,1,2,4,NaT,2019-06-24 08:09:56,2020-11-18 16:15:25
2,27,R2-E314R1200-X11DPi-N-deleted,1.0,2U DP xeon server,,,,,,,...,86,1,1,1,1,2,4,2019-06-24 11:21:10,2019-06-24 08:10:15,2019-06-24 11:21:10
3,28,D4363-DS228-MDR,3.0,847BE1C4-R1K23LPB + X11DPi-N,D4363-DS228-MDR,D4363-DS228-MDR,,/storage/bom/Bom/d4363-ds228-mdr-image_url.jpg,/storage/bom/Bom/d4363-ds228-mdr-second_image_...,/storage/bom/Bom/d4363-ds228-mdr-third_image_u...,...,97,1,1,1,1,2,4,NaT,2019-06-24 11:19:49,2020-10-21 17:21:44
4,29,BareBom-Testing,1.0,12123123,,,,,,,...,1,1,1,1,1,2,3,2019-07-03 03:13:31,2019-06-25 07:44:35,2019-07-03 03:13:31


In [55]:
boms.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 30 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   id                    482 non-null    int64         
 1   name                  482 non-null    object        
 2   deliver_option        473 non-null    float64       
 3   desc                  482 non-null    object        
 4   vendor                439 non-null    object        
 5   mpn                   384 non-null    object        
 6   schema_sku            0 non-null      object        
 7   image_url             482 non-null    object        
 8   second_image_url      371 non-null    object        
 9   third_image_url       359 non-null    object        
 10  fourth_image_url      349 non-null    object        
 11  fifth_image_url       321 non-null    object        
 12  weight                482 non-null    float64       
 13  start_price         

In [56]:
boms.bom_type.unique()

array([1, 2, 4, 5, 3])

In [57]:
boms = boms[['id', 'name', 'bom_type', 'bom_category_id']]
boms.head()

Unnamed: 0,id,name,bom_type,bom_category_id
0,25,Bom-Test-1,1,1
1,26,D2123-DS228-MDR,1,17
2,27,R2-E314R1200-X11DPi-N-deleted,1,1
3,28,D4363-DS228-MDR,1,20
4,29,BareBom-Testing,2,1


In [58]:
def combine_with_boms(df, boms):
    df = df.rename(columns={'boms_id': 'id'})
    df = df.merge(boms, on='id')
    df = df.rename(columns={'name_y': 'name', 'id': 'boms_id'})
    return df

In [59]:
data_boms = combine_with_boms(data, boms)
data_boms.head()

Unnamed: 0,boms_id,partner,desc,product_id,name_x,chassis,motherboard,form_factor,led_indicators,on_off_switch,...,intel_virtual_technology_for_directed_i/o,Intel® Rapid Storage Technology enterprise,Intel® Quiet System Technology,Intel® Fast Memory Access,Intel® Flex Memory Access,TPM Version,Intel® Transparent Supply Chain,name,bom_type,bom_category_id
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,D2123-DS228-MDR,1,17
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,...,,,,,,,,D4363-DS228-MDR,1,20
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,D283-DS228-MDR,1,17
3,32,supermicro,836BE1C-R1K23B + X11DPi-N,,D3163-DS228-MDR,CSE-836BE1C-R1K23B,MBD-X11DPi-N,3U,,,...,,,,,,,,D3163-DS228-MDR,1,20
4,33,supermicro,846BE1C-R1K23B + X11DPi-N,,D4243-DS228-MDR,CSE-846BE1C-R1K23B,MBD-X11DPi-N,4U,,,...,,,,,,,,D4243-DS228-MDR,1,20


In [29]:
data_boms.bom_type.unique()

array([1, 2, 4])

In [60]:
df1 = data_boms.loc[data_boms.bom_type == 1].head(3)

In [61]:
df2 = data_boms.loc[data_boms.bom_type == 2].head(3)

In [62]:
df3 = data_boms.loc[data_boms.bom_type == 4].head(3)

In [63]:
reco_samples = pd.concat([df1, df2, df3])
reco_samples

Unnamed: 0,boms_id,partner,desc,product_id,name_x,chassis,motherboard,form_factor,led_indicators,on_off_switch,...,intel_virtual_technology_for_directed_i/o,Intel® Rapid Storage Technology enterprise,Intel® Quiet System Technology,Intel® Fast Memory Access,Intel® Flex Memory Access,TPM Version,Intel® Transparent Supply Chain,name,bom_type,bom_category_id
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,D2123-DS228-MDR,1,17
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,...,,,,,,,,D4363-DS228-MDR,1,20
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,D283-DS228-MDR,1,17
6,37,supermicro,AS + H11SSL-i,,S143-SA332-MR,AS,MBD-H11SSL-i,,,,...,,,,,,,,S143-SA332-MR,2,17
54,94,supermicro,1028R-WTR + X10DRW-i,,S1102-DE422-D10GNR,SYS-1028R-WTR,MBD-X10DRW-i,1U,Y,Y,...,,,,,,,,S1102-DE422-D10GNR,2,17
55,95,supermicro,1028R-WTRT + X10DRW-iT,,S1102-DE422-D10GR,SYS-1028R-WTRT,MBD-X10DRW-iT,1U,Y,Y,...,,,,,,,,S1102-DE422-D10GR,2,17
91,139,supermicro,216BE1C-R741JBOD + X11DPT-PS,,SJ242-AR,CSE-216BE1C-R741JBOD,MBD-X11DPT-PS,2U,,,...,,,,,,,,SJ242-AR,4,30
92,140,supermicro,826BE2C-R741JBOD + X11DPT-PS,,SJ123-AR,CSE-826BE2C-R741JBOD,MBD-X11DPT-PS,2U,,,...,,,,,,,,SJ123-AR,4,30
93,141,supermicro,847E2C-R1K28JBOD + X11DPT-PS,,SJ443-AR,CSE-847E2C-R1K28JBOD,MBD-X11DPT-PS,4U,,,...,,,,,,,,SJ443-AR,4,30


### Feature Engineering

In [64]:
def drop_nan_col(df, threshold): 
    for i in df.columns:
        if (float(df[i].isnull().sum())/df[i].shape[0]) > threshold:
            df = df.drop(i, axis=1) 
    return df

In [65]:
data2 = drop_nan_col(data, 0.9)
data2.head()

Unnamed: 0,boms_id,partner,desc,name,chassis,motherboard,form_factor,led_indicators,on_off_switch,fan_size,...,motherboard_chipset,lan_ports,onboard_connection,gbe,onboard_network_controllers,supported_storage_drive_interfaces,num_of_processors_supported,processor_socket,num_of_fasns,type
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",3,D
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",7,D
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",3,D
3,32,supermicro,836BE1C-R1K23B + X11DPi-N,D3163-DS228-MDR,CSE-836BE1C-R1K23B,MBD-X11DPi-N,3U,,,80x32 mm 6.7K RPM SC836 Rear Exhaust Fan w/ Ho...,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",23,D
4,33,supermicro,846BE1C-R1K23B + X11DPi-N,D4243-DS228-MDR,CSE-846BE1C-R1K23B,MBD-X11DPi-N,4U,,,80x32 mm 6.7K RPM SC836 Rear Exhaust Fan w/ Ho...,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",23,D


In [66]:
print(data.shape)
print(data2.shape)

(453, 146)
(453, 54)


In [67]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 453 entries, 0 to 452
Data columns (total 54 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   boms_id                                453 non-null    int64  
 1   partner                                453 non-null    object 
 2   desc                                   449 non-null    object 
 3   name                                   453 non-null    object 
 4   chassis                                443 non-null    object 
 5   motherboard                            441 non-null    object 
 6   form_factor                            365 non-null    object 
 7   led_indicators                         145 non-null    object 
 8   on_off_switch                          145 non-null    object 
 9   fan_size                               235 non-null    object 
 10  product_colour                         412 non-null    object 
 11  rohs_c

In [68]:
# data2['name'].value_counts()[:].plot(kind='barh', figsize=[10,120], fontsize=13, color='navy').invert_yaxis()

### Bag of words

In [69]:
data2['bag of words'] = data2[data2.columns[:]].apply(lambda x: ' '.join(x.dropna().astype(str)),axis=1)
data2['bag of words'] = data2['bag of words'].apply(lambda x: x.lower())
data2['bag of words'].head()

0    26 supermicro 826be1c4-r1k23lpb + x11dpi-n d21...
1    28 supermicro 847be1c4-r1k23lpb + x11dpi-n d43...
2    31 supermicro 825tqc-r1k03lpb + x11dpi-n d283-...
3    32 supermicro 836be1c-r1k23b + x11dpi-n d3163-...
4    33 supermicro 846be1c-r1k23b + x11dpi-n d4243-...
Name: bag of words, dtype: object

### TF-IDF and Cosine Simillarity

In [70]:
tf = TfidfVectorizer(analyzer='word', 
                     ngram_range=(1, 1), 
                     min_df=0, 
                     stop_words='english')

In [71]:
tfidf_matrix = tf.fit_transform(data2['bag of words'])

In [72]:
tfidf_matrix.data

array([0.06363304, 0.0438786 , 0.06363304, ..., 0.06223386, 0.07805358,
       0.09593518])

In [73]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [74]:
cosine_similarities

array([[1.        , 0.63477188, 0.49245745, ..., 0.05056173, 0.03489777,
        0.03371032],
       [0.63477188, 1.        , 0.44832222, ..., 0.04801105, 0.03313729,
        0.03200974],
       [0.49245745, 0.44832222, 1.        , ..., 0.04797966, 0.03311563,
        0.03198882],
       ...,
       [0.05056173, 0.04801105, 0.04797966, ..., 1.        , 0.72567777,
        0.70098544],
       [0.03489777, 0.03313729, 0.03311563, ..., 0.72567777, 1.        ,
        0.78577134],
       [0.03371032, 0.03200974, 0.03198882, ..., 0.70098544, 0.78577134,
        1.        ]])

### Recommender functions 

In [75]:
def similarity_algorithm(): 
    results = {}
    for idx, row in data2.iterrows():
        similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
        similar_items = [(cosine_similarities[idx][i], data2['boms_id'][i]) for i in similar_indices]
        results[row['boms_id']] = similar_items[1:]
    return results

In [76]:
def item(id):
    return data2.loc[data2['boms_id'] == id]['name'].tolist()[0].split(' - ')[0]

In [158]:
def recommend(boms_id, num):
    print("Recommending " + str(num) + " products similar to " + item(boms_id))
    print("-"*60)
    results = similarity_algorithm()
    recs = results[boms_id][:num]
    recomendations = []
    recomendations2 = []
    recomendations3 = []
    recomendations_dict = {}
#     recomendations_dict['status'] = 200
    for rec in recs:
#         recomendations.append((str(rec[1]), item(rec[1])))
        recomendations.append(str(rec[1]))
        recomendations2.append(item(rec[1]))
        recomendations3.append(str(rec[0]))
        print("Recommended: " + item(rec[1]) + " (simillarity score: " + str(rec[0]) + ")" )
#     recomendations_dict['data'] = [dict(recomendations)]
    return recomendations, recomendations2, recomendations3
#     return dict(recomendations), recomendations3

### API fetch

Task:
- API from frontend to backend with URL, name and **boms_id** of product.

### Prediction

In [85]:
# recomender_result = {}
# for boms in data2.boms_id:
#     recomender_result[boms] = recommend(boms_id = boms , num=5)

In [86]:
# recomender_result

In [87]:
# result = []
# for i in range(len(recomender_result)):
#     for key, index in recomender_result.items():
#         for k , value in index.items():
#             result.append([key, k, value])

In [88]:
# data3 = pd.DataFrame.from_records(result)
# data3 = data3.astype(str)
# data3

In [89]:
# data3 = data3.rename(columns={0:'boms_id', 1:'simillar_boms_id', 2:'name'})
# data3

In [90]:
# data3['boms_id'].value_counts()[:].plot(kind='barh', figsize=[10,120], fontsize=13, color='navy').invert_yaxis()

In [187]:
boms_id = 141

In [188]:
recomendations, recomendations2, recomendations3 = recommend(boms_id = boms_id, num=5)

Recommending 5 products similar to SJ443-AR
------------------------------------------------------------
Recommended: SJ123-AR (simillarity score: 0.49682408064200045)
Recommended: SJ242-AR (simillarity score: 0.48712614617459665)
Recommended: SB334N-DS228-MIR (simillarity score: 0.4610196084675132)
Recommended: SB624N-DS228-MIR (simillarity score: 0.459286919777357)
Recommended: S142-DS228-IR (simillarity score: 0.4493350749495194)


In [165]:
d1 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })
d1

Unnamed: 0,boms_id,simillar_boms_id,name,similarity
0,26,59,D2123-SS228-MDR,0.6576068709246723
1,26,143,DNS4363-DS228-MDR,0.6410497808757499
2,26,28,D4363-DS228-MDR,0.634771876792206
3,26,35,D2242-DS228-NMDOR,0.6316551776985586
4,26,75,D2123-DA32-MDR,0.59859521233277


In [168]:
d2 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [171]:
d3 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [174]:
d4 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [177]:
d5 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [180]:
d6 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [183]:
d7 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [186]:
d8 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [189]:
d9 = pd.DataFrame({'boms_id': boms_id, 'simillar_boms_id': recomendations, 'name': recomendations2, 'similarity': recomendations3 })

In [192]:
pd.concat([d1,d2,d3,d4,d5,d6,d7,d8,d9])

Unnamed: 0,boms_id,simillar_boms_id,name,similarity
0,26,59,D2123-SS228-MDR,0.6576068709246723
1,26,143,DNS4363-DS228-MDR,0.6410497808757499
2,26,28,D4363-DS228-MDR,0.634771876792206
3,26,35,D2242-DS228-NMDOR,0.6316551776985586
4,26,75,D2123-DA32-MDR,0.59859521233277
0,28,143,DNS4363-DS228-MDR,0.9236725219671656
1,28,62,D4363-SS228-MDR,0.6899868098005703
2,28,47,D3163-DE424-R,0.6504648053672889
3,28,147,DNS3163-DE424-R,0.6499589115555965
4,28,26,D2123-DS228-MDR,0.634771876792206


In [128]:
len(boms)

9

In [143]:
boms = []
recomendations = [] 
# recomendations2 = [] 
# recomendations3 = []
for index, row in reco_samples.iterrows():
    boms.append(row.boms_id)
    recomendations.append(recommend(boms_id = row.boms_id, num=5))

Recommending 5 products similar to D2123-DS228-MDR
------------------------------------------------------------
Recommended: D2123-SS228-MDR (simillarity score: 0.6576068709246723)
Recommended: DNS4363-DS228-MDR (simillarity score: 0.6410497808757499)
Recommended: D4363-DS228-MDR (simillarity score: 0.634771876792206)
Recommended: D2242-DS228-NMDOR (simillarity score: 0.6316551776985586)
Recommended: D2123-DA32-MDR (simillarity score: 0.59859521233277)
Recommending 5 products similar to D4363-DS228-MDR
------------------------------------------------------------
Recommended: DNS4363-DS228-MDR (simillarity score: 0.9236725219671656)
Recommended: D4363-SS228-MDR (simillarity score: 0.6899868098005703)
Recommended: D3163-DE424-R (simillarity score: 0.6504648053672889)
Recommended: DNS3163-DE424-R (simillarity score: 0.6499589115555965)
Recommended: D2123-DS228-MDR (simillarity score: 0.634771876792206)
Recommending 5 products similar to D283-DS228-MDR
-------------------------------------

In [146]:
pd.DataFrame(recomendations).to_excel('Recomender_results.xlsx')

In [227]:
def get_boms_id(name, data):
    bom_id = data[data['name'] == name].boms_id.values[0]
    return bom_id 

In [231]:
name = 'D2123-DS228-MDR'

In [234]:
# data2[data2['name'] == name].boms_id.values[0]
boms_id = get_boms_id(name, data2)
boms_id

26

In [235]:
recomendations = recommend(boms_id = boms_id, num=5)

Recommending 5 products similar to D2123-DS228-MDR
------------------------------------------------------------
Recommended: D2123-SS228-MDR (simillarity score: 0.6576068709246723)
Recommended: DNS4363-DS228-MDR (simillarity score: 0.6410497808757499)
Recommended: D4363-DS228-MDR (simillarity score: 0.634771876792206)
Recommended: D2242-DS228-NMDOR (simillarity score: 0.6316551776985586)
Recommended: D2123-DA32-MDR (simillarity score: 0.59859521233277)


### API Deployment

Task:
- Send API response containing 5 simillar products names back to frontend

In [None]:
from flask import Flask, jsonify, request
from flask_cors import CORS

# Init app
app = Flask(__name__)
CORS(app)

# API
@app.route('/api')
# @app.route('/api/recomender', methods = ['GET', 'POST'])
# def send_recommenxwdatios():
#     boms_id = request.json
#     print(boms_id, request.json)
#     recomendations = {}
#     if boms_id:
#         print(boms_id)
#         recomendations = recommend(boms_id, 5)
#         return jsonify(recomendations)
#     print(recomendations)
#     return 'API Not Working'

@app.route('/api/recomender/<boms_id>', methods = ['GET', 'POST'])
def send_recommenxwdatios(boms_id):
    recomendations = {}
    print(boms_id, type(boms_id))
    if boms_id:
        recomendations = recommend(int(boms_id), 5)
        recomendations['error'] = ''
        return jsonify(recomendations)
    print(recomendations)
    return recomendations

# @app.route('/api/recomender/<name>', methods = ['GET', 'POST'])
# def send_recommenxwdatios(name):
#     print(name, type(name))
#     if name:
#         boms_id = get_boms_id(name, data2)
#         print(boms_id)
#         recomendations = recommend(int(boms_id), 5)
#         return jsonify(recomendations)
#     print(recomendations)
#     return 'API Not Working'


# Run Server
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


26 <class 'str'>
Recommending 5 products similar to D2123-DS228-MDR
------------------------------------------------------------


127.0.0.1 - - [11/Jan/2021 16:04:25] "[37mGET /api/recomender/26 HTTP/1.1[0m" 200 -


Recommended: D2123-SS228-MDR (simillarity score: 0.6576068709246723)
Recommended: DNS4363-DS228-MDR (simillarity score: 0.6410497808757499)
Recommended: D4363-DS228-MDR (simillarity score: 0.634771876792206)
Recommended: D2242-DS228-NMDOR (simillarity score: 0.6316551776985586)
Recommended: D2123-DA32-MDR (simillarity score: 0.59859521233277)


[2021-01-11 16:04:28,509] ERROR in app: Exception on /api/recomender/25 [GET]
Traceback (most recent call last):
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask_cors/extension.py", line 165, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1950, in full_dispa

25 <class 'str'>


### Everything together

In [None]:
# Packages
from flask import Flask, jsonify, request
from flask_cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load Dataset
data = pd.read_excel('specifications.xlsx')
data.drop([col for col in data.columns if "Unnamed" in col], axis=1, inplace=True)
data['partner'] = data['partner'].fillna('intel')

# Axulliary functions
def drop_nan_col(df, threshold): 
    for i in df.columns:
        if (float(df[i].isnull().sum())/df[i].shape[0]) > threshold:
            df = df.drop(i, axis=1) 
    return df

def similarity_algorithm(): 
    results = {}
    for idx, row in data2.iterrows():
        similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
        similar_items = [(cosine_similarities[idx][i], data2['boms_id'][i]) for i in similar_indices]
        results[row['boms_id']] = similar_items[1:]
    return results

def item(id):
    return data2.loc[data2['boms_id'] == id]['name'].tolist()[0].split(' - ')[0]

def recommend(boms_id, num):
    print("Recommending " + str(num) + " products similar to " + item(boms_id))
    print("-"*60)
    results = similarity_algorithm()
    recs = results[boms_id][:num]
    recomendations = []
    for rec in recs:
        recomendations.append((str(rec[1]), item(rec[1])))
        print("Recommended: " + item(rec[1]) + " (simillarity score: " + str(rec[0]) + ")" )
    return dict(recomendations)

def get_boms_id(name, data):
    bom_id = data[data['name'] == name].boms_id.values[0]
    return bom_id 

# Data pre-processing
data2 = drop_nan_col(data, 0.9)
data2['bag of words'] = data2[data2.columns[:]].apply(lambda x: ' '.join(x.dropna().astype(str)),axis=1)
data2['bag of words'] = data2['bag of words'].apply(lambda x: x.lower())

# Machine learning
tf = TfidfVectorizer(analyzer='word', 
                     ngram_range=(1, 1), 
                     min_df=0, 
                     stop_words='english')

tfidf_matrix = tf.fit_transform(data2['bag of words'])
print(tfidf_matrix.data)

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# API app
app = Flask(__name__)
CORS(app)

# API
@app.route('/api')
@app.route('/api/recomender/<name>', methods = ['GET', 'POST'])
def send_recommenxwdatios(name):
    print(name, type(name))
    if name:
        boms_id = get_boms_id(name, data2)
        print(boms_id)
        recomendations = recommend(int(boms_id), 5)
        return jsonify(recomendations)
    print(recomendations)
    return 'API Not Working'


# Run Server
if __name__ == '__main__':
    app.run()

[0.06363304 0.0438786  0.06363304 ... 0.06223386 0.07805358 0.09593518]
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Jan/2021 10:10:52] "[33mGET / HTTP/1.1[0m" 404 -
127.0.0.1 - - [08/Jan/2021 10:10:57] "[33mGET /api/crawler HTTP/1.1[0m" 404 -
[2021-01-08 10:11:01,305] ERROR in app: Exception on /api/recomender/26 [GET]
Traceback (most recent call last):
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask_cors/extension.py", line 165, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/Users/mohsin

26 <class 'str'>
S143-Si376-D <class 'str'>
101
Recommending 5 products similar to S143-Si376-D
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:21] "[37mPOST /api/recomender/S143-Si376-D HTTP/1.1[0m" 200 -


Recommended: SuperServer-5019S-M (simillarity score: 0.9012871843400873)
Recommended: S122-Si376-D (simillarity score: 0.655773602400534)
Recommended: SuperServer-5019S-L (simillarity score: 0.6450110843565203)
Recommended: S143-SE66-MDR (simillarity score: 0.6096891471894528)
Recommended: SuperServer-5019S-WR (simillarity score: 0.5854402673967348)
SuperServer-5019S-WR <class 'str'>
202
Recommending 5 products similar to SuperServer-5019S-WR
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:29] "[37mPOST /api/recomender/SuperServer-5019S-WR HTTP/1.1[0m" 200 -


Recommended: S143-SE66-MDR (simillarity score: 0.9156223246485974)
Recommended: SuperServer-5019S-M (simillarity score: 0.638326640071691)
Recommended: S143-Si376-D (simillarity score: 0.5854402673967348)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5784307788387836)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5784307788387836)
S143-SE66-MDR <class 'str'>
132
Recommending 5 products similar to S143-SE66-MDR
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:37] "[37mPOST /api/recomender/S143-SE66-MDR HTTP/1.1[0m" 200 -


Recommended: SuperServer-5019S-WR (simillarity score: 0.9156223246485974)
Recommended: SuperServer-5019S-M (simillarity score: 0.6155659511536719)
Recommended: S143-Si376-D (simillarity score: 0.6096891471894528)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5695223298924028)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5695223298924028)
SuperServer-5019S-M <class 'str'>
201
Recommending 5 products similar to SuperServer-5019S-M
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:43] "[37mPOST /api/recomender/SuperServer-5019S-M HTTP/1.1[0m" 200 -


Recommended: S143-Si376-D (simillarity score: 0.9012871843400873)
Recommended: SuperServer-5019S-L (simillarity score: 0.7038623139344671)
Recommended: S122-Si376-D (simillarity score: 0.6446049219703406)
Recommended: SuperServer-5019S-WR (simillarity score: 0.638326640071691)
Recommended: S143-SE66-MDR (simillarity score: 0.6155659511536719)
