### Product Recomender Engine

**Developer:** Mayana Mohsin Khan <br/>

**Date Created:** 30th December 2020 <br/>
**Date Modified:** 1st January 2021

### Packages

In [12]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import openpyxl

### Load Dataset

In [14]:
data = pd.read_excel('specifications.xlsx')
data.drop([col for col in data.columns if "Unnamed" in col], axis=1, inplace=True)
data['partner'] = data['partner'].fillna('intel')
data.head()

Unnamed: 0,boms_id,partner,desc,product_id,name,chassis,motherboard,form_factor,led_indicators,on_off_switch,...,Intel® Build Assurance Technology,Intel® Efficient Power Technology,Intel® Quiet Thermal Technology,intel_virtual_technology_for_directed_i/o,Intel® Rapid Storage Technology enterprise,Intel® Quiet System Technology,Intel® Fast Memory Access,Intel® Flex Memory Access,TPM Version,Intel® Transparent Supply Chain
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,,,
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,...,,,,,,,,,,
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,...,,,,,,,,,,
3,32,supermicro,836BE1C-R1K23B + X11DPi-N,,D3163-DS228-MDR,CSE-836BE1C-R1K23B,MBD-X11DPi-N,3U,,,...,,,,,,,,,,
4,33,supermicro,846BE1C-R1K23B + X11DPi-N,,D4243-DS228-MDR,CSE-846BE1C-R1K23B,MBD-X11DPi-N,4U,,,...,,,,,,,,,,


### Feature Engineering

In [15]:
def drop_nan_col(df, threshold): 
    for i in df.columns:
        if (float(df[i].isnull().sum())/df[i].shape[0]) > threshold:
            df = df.drop(i, axis=1) 
    return df

In [16]:
data2 = drop_nan_col(data, 0.9)
data2.head()

Unnamed: 0,boms_id,partner,desc,name,chassis,motherboard,form_factor,led_indicators,on_off_switch,fan_size,...,motherboard_chipset,lan_ports,onboard_connection,gbe,onboard_network_controllers,supported_storage_drive_interfaces,num_of_processors_supported,processor_socket,num_of_fasns,type
0,26,supermicro,826BE1C4-R1K23LPB + X11DPi-N,D2123-DS228-MDR,CSE-826BE1C4-R1K23LPB,MBD-X11DPi-N,2U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",3,D
1,28,supermicro,847BE1C4-R1K23LPB + X11DPi-N,D4363-DS228-MDR,CSE-847BE1C4-R1K23LPB,MBD-X11DPi-N,4U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",7,D
2,31,supermicro,825TQC-R1K03LPB + X11DPi-N,D283-DS228-MDR,CSE-825TQC-R1K03LPB,MBD-X11DPi-N,2U,,,80x80x38 mm,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",3,D
3,32,supermicro,836BE1C-R1K23B + X11DPi-N,D3163-DS228-MDR,CSE-836BE1C-R1K23B,MBD-X11DPi-N,3U,,,80x32 mm 6.7K RPM SC836 Rear Exhaust Fan w/ Ho...,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",23,D
4,33,supermicro,846BE1C-R1K23B + X11DPi-N,D4243-DS228-MDR,CSE-846BE1C-R1K23B,MBD-X11DPi-N,4U,,,80x32 mm 6.7K RPM SC836 Rear Exhaust Fan w/ Ho...,...,Intel® C621,2.0,RJ45 Gigabit Ethernet LAN ports,1.0,Dual LAN with 1GbE LAN with Intel® X722,SATA3,2.0,"Dual Socket LGA-3647 (Socket P) supported, CPU...",23,D


In [17]:
print(data.shape)
print(data2.shape)

(453, 146)
(453, 54)


In [18]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 453 entries, 0 to 452
Data columns (total 54 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   boms_id                                453 non-null    int64  
 1   partner                                453 non-null    object 
 2   desc                                   449 non-null    object 
 3   name                                   453 non-null    object 
 4   chassis                                443 non-null    object 
 5   motherboard                            441 non-null    object 
 6   form_factor                            365 non-null    object 
 7   led_indicators                         145 non-null    object 
 8   on_off_switch                          145 non-null    object 
 9   fan_size                               235 non-null    object 
 10  product_colour                         412 non-null    object 
 11  rohs_c

In [20]:
# data2['name'].value_counts()[:].plot(kind='barh', figsize=[10,120], fontsize=13, color='navy').invert_yaxis()

### Bag of words

In [19]:
data2['bag of words'] = data2[data2.columns[:]].apply(lambda x: ' '.join(x.dropna().astype(str)),axis=1)
data2['bag of words'] = data2['bag of words'].apply(lambda x: x.lower())
data2['bag of words'].head()

0    26 supermicro 826be1c4-r1k23lpb + x11dpi-n d21...
1    28 supermicro 847be1c4-r1k23lpb + x11dpi-n d43...
2    31 supermicro 825tqc-r1k03lpb + x11dpi-n d283-...
3    32 supermicro 836be1c-r1k23b + x11dpi-n d3163-...
4    33 supermicro 846be1c-r1k23b + x11dpi-n d4243-...
Name: bag of words, dtype: object

### TF-IDF and Cosine Simillarity

In [20]:
tf = TfidfVectorizer(analyzer='word', 
                     ngram_range=(1, 1), 
                     min_df=0, 
                     stop_words='english')

In [21]:
tfidf_matrix = tf.fit_transform(data2['bag of words'])

In [22]:
tfidf_matrix.data

array([0.06363304, 0.0438786 , 0.06363304, ..., 0.06223386, 0.07805358,
       0.09593518])

In [23]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [24]:
cosine_similarities

array([[1.        , 0.63477188, 0.49245745, ..., 0.05056173, 0.03489777,
        0.03371032],
       [0.63477188, 1.        , 0.44832222, ..., 0.04801105, 0.03313729,
        0.03200974],
       [0.49245745, 0.44832222, 1.        , ..., 0.04797966, 0.03311563,
        0.03198882],
       ...,
       [0.05056173, 0.04801105, 0.04797966, ..., 1.        , 0.72567777,
        0.70098544],
       [0.03489777, 0.03313729, 0.03311563, ..., 0.72567777, 1.        ,
        0.78577134],
       [0.03371032, 0.03200974, 0.03198882, ..., 0.70098544, 0.78577134,
        1.        ]])

### Recommender functions 

In [25]:
def similarity_algorithm(): 
    results = {}
    for idx, row in data2.iterrows():
        similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
        similar_items = [(cosine_similarities[idx][i], data2['boms_id'][i]) for i in similar_indices]
        results[row['boms_id']] = similar_items[1:]
    return results

In [26]:
def item(id):
    return data2.loc[data2['boms_id'] == id]['name'].tolist()[0].split(' - ')[0]

In [48]:
def recommend(boms_id, num):
    print("Recommending " + str(num) + " products similar to " + item(boms_id))
    print("-"*60)
    results = similarity_algorithm()
    recs = results[boms_id][:num]
    recomendations = []
    recomendations_dict = {}
    recomendations_dict['status'] = 200
    for rec in recs:
        recomendations.append((str(rec[1]), item(rec[1])))
        print("Recommended: " + item(rec[1]) + " (simillarity score: " + str(rec[0]) + ")" )
    recomendations_dict['data'] = [dict(recomendations)]
    return recomendations_dict

### API fetch

Task:
- API from frontend to backend with URL, name and **boms_id** of product.

### Prediction

In [208]:
# recomender_result = {}
# for boms in data2.boms_id:
#     recomender_result[boms] = recommend(boms_id = boms , num=5)

In [207]:
# recomender_result

In [206]:
# result = []
# for i in range(len(recomender_result)):
#     for key, index in recomender_result.items():
#         for k , value in index.items():
#             result.append([key, k, value])

In [205]:
# data3 = pd.DataFrame.from_records(result)
# data3 = data3.astype(str)
# data3

In [204]:
# data3 = data3.rename(columns={0:'boms_id', 1:'simillar_boms_id', 2:'name'})
# data3

In [202]:
# data3['boms_id'].value_counts()[:].plot(kind='barh', figsize=[10,120], fontsize=13, color='navy').invert_yaxis()

In [28]:
boms_id = 100

In [49]:
recomendations = recommend(boms_id = boms_id, num=5)

Recommending 5 products similar to S122-Si376-D
------------------------------------------------------------
Recommended: SuperServer-5019S-L (simillarity score: 0.916267818512312)
Recommended: S143-Si376-D (simillarity score: 0.655773602400534)
Recommended: SuperServer-5019S-M (simillarity score: 0.6446049219703406)
Recommended: SuperServer-5039D-i (simillarity score: 0.6258234083475249)
Recommended: SuperServer-5019S-WR (simillarity score: 0.5179195977913175)


In [50]:
recomendations

{'status': 200,
 'data': [{'200': 'SuperServer-5019S-L',
   '101': 'S143-Si376-D',
   '201': 'SuperServer-5019S-M',
   '294': 'SuperServer-5039D-i',
   '202': 'SuperServer-5019S-WR'}]}

In [227]:
def get_boms_id(name, data):
    bom_id = data[data['name'] == name].boms_id.values[0]
    return bom_id 

In [231]:
name = 'D2123-DS228-MDR'

In [234]:
# data2[data2['name'] == name].boms_id.values[0]
boms_id = get_boms_id(name, data2)
boms_id

26

In [235]:
recomendations = recommend(boms_id = boms_id, num=5)

Recommending 5 products similar to D2123-DS228-MDR
------------------------------------------------------------
Recommended: D2123-SS228-MDR (simillarity score: 0.6576068709246723)
Recommended: DNS4363-DS228-MDR (simillarity score: 0.6410497808757499)
Recommended: D4363-DS228-MDR (simillarity score: 0.634771876792206)
Recommended: D2242-DS228-NMDOR (simillarity score: 0.6316551776985586)
Recommended: D2123-DA32-MDR (simillarity score: 0.59859521233277)


### API Deployment

Task:
- Send API response containing 5 simillar products names back to frontend

In [None]:
from flask import Flask, jsonify, request
from flask_cors import CORS

# Init app
app = Flask(__name__)
CORS(app)

# API
@app.route('/api')
# @app.route('/api/recomender', methods = ['GET', 'POST'])
# def send_recommenxwdatios():
#     boms_id = request.json
#     print(boms_id, request.json)
#     recomendations = {}
#     if boms_id:
#         print(boms_id)
#         recomendations = recommend(boms_id, 5)
#         return jsonify(recomendations)
#     print(recomendations)
#     return 'API Not Working'

@app.route('/api/recomender/<boms_id>', methods = ['GET', 'POST'])
def send_recommenxwdatios(boms_id):
    recomendations = {}
    print(boms_id, type(boms_id))
    if boms_id:
        recomendations = recommend(int(boms_id), 5)
        recomendations['error'] = ''
        return jsonify(recomendations)
    print(recomendations)
    return recomendations

# @app.route('/api/recomender/<name>', methods = ['GET', 'POST'])
# def send_recommenxwdatios(name):
#     print(name, type(name))
#     if name:
#         boms_id = get_boms_id(name, data2)
#         print(boms_id)
#         recomendations = recommend(int(boms_id), 5)
#         return jsonify(recomendations)
#     print(recomendations)
#     return 'API Not Working'


# Run Server
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


26 <class 'str'>
Recommending 5 products similar to D2123-DS228-MDR
------------------------------------------------------------


127.0.0.1 - - [11/Jan/2021 16:04:25] "[37mGET /api/recomender/26 HTTP/1.1[0m" 200 -


Recommended: D2123-SS228-MDR (simillarity score: 0.6576068709246723)
Recommended: DNS4363-DS228-MDR (simillarity score: 0.6410497808757499)
Recommended: D4363-DS228-MDR (simillarity score: 0.634771876792206)
Recommended: D2242-DS228-NMDOR (simillarity score: 0.6316551776985586)
Recommended: D2123-DA32-MDR (simillarity score: 0.59859521233277)


[2021-01-11 16:04:28,509] ERROR in app: Exception on /api/recomender/25 [GET]
Traceback (most recent call last):
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask_cors/extension.py", line 165, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1950, in full_dispa

25 <class 'str'>


### Everything together

In [None]:
# Packages
from flask import Flask, jsonify, request
from flask_cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load Dataset
data = pd.read_excel('specifications.xlsx')
data.drop([col for col in data.columns if "Unnamed" in col], axis=1, inplace=True)
data['partner'] = data['partner'].fillna('intel')

# Axulliary functions
def drop_nan_col(df, threshold): 
    for i in df.columns:
        if (float(df[i].isnull().sum())/df[i].shape[0]) > threshold:
            df = df.drop(i, axis=1) 
    return df

def similarity_algorithm(): 
    results = {}
    for idx, row in data2.iterrows():
        similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
        similar_items = [(cosine_similarities[idx][i], data2['boms_id'][i]) for i in similar_indices]
        results[row['boms_id']] = similar_items[1:]
    return results

def item(id):
    return data2.loc[data2['boms_id'] == id]['name'].tolist()[0].split(' - ')[0]

def recommend(boms_id, num):
    print("Recommending " + str(num) + " products similar to " + item(boms_id))
    print("-"*60)
    results = similarity_algorithm()
    recs = results[boms_id][:num]
    recomendations = []
    for rec in recs:
        recomendations.append((str(rec[1]), item(rec[1])))
        print("Recommended: " + item(rec[1]) + " (simillarity score: " + str(rec[0]) + ")" )
    return dict(recomendations)

def get_boms_id(name, data):
    bom_id = data[data['name'] == name].boms_id.values[0]
    return bom_id 

# Data pre-processing
data2 = drop_nan_col(data, 0.9)
data2['bag of words'] = data2[data2.columns[:]].apply(lambda x: ' '.join(x.dropna().astype(str)),axis=1)
data2['bag of words'] = data2['bag of words'].apply(lambda x: x.lower())

# Machine learning
tf = TfidfVectorizer(analyzer='word', 
                     ngram_range=(1, 1), 
                     min_df=0, 
                     stop_words='english')

tfidf_matrix = tf.fit_transform(data2['bag of words'])
print(tfidf_matrix.data)

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# API app
app = Flask(__name__)
CORS(app)

# API
@app.route('/api')
@app.route('/api/recomender/<name>', methods = ['GET', 'POST'])
def send_recommenxwdatios(name):
    print(name, type(name))
    if name:
        boms_id = get_boms_id(name, data2)
        print(boms_id)
        recomendations = recommend(int(boms_id), 5)
        return jsonify(recomendations)
    print(recomendations)
    return 'API Not Working'


# Run Server
if __name__ == '__main__':
    app.run()

[0.06363304 0.0438786  0.06363304 ... 0.06223386 0.07805358 0.09593518]
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Jan/2021 10:10:52] "[33mGET / HTTP/1.1[0m" 404 -
127.0.0.1 - - [08/Jan/2021 10:10:57] "[33mGET /api/crawler HTTP/1.1[0m" 404 -
[2021-01-08 10:11:01,305] ERROR in app: Exception on /api/recomender/26 [GET]
Traceback (most recent call last):
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask_cors/extension.py", line 165, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "/Users/mohsinkhan/opt/anaconda3/lib/python3.7/site-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/Users/mohsin

26 <class 'str'>
S143-Si376-D <class 'str'>
101
Recommending 5 products similar to S143-Si376-D
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:21] "[37mPOST /api/recomender/S143-Si376-D HTTP/1.1[0m" 200 -


Recommended: SuperServer-5019S-M (simillarity score: 0.9012871843400873)
Recommended: S122-Si376-D (simillarity score: 0.655773602400534)
Recommended: SuperServer-5019S-L (simillarity score: 0.6450110843565203)
Recommended: S143-SE66-MDR (simillarity score: 0.6096891471894528)
Recommended: SuperServer-5019S-WR (simillarity score: 0.5854402673967348)
SuperServer-5019S-WR <class 'str'>
202
Recommending 5 products similar to SuperServer-5019S-WR
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:29] "[37mPOST /api/recomender/SuperServer-5019S-WR HTTP/1.1[0m" 200 -


Recommended: S143-SE66-MDR (simillarity score: 0.9156223246485974)
Recommended: SuperServer-5019S-M (simillarity score: 0.638326640071691)
Recommended: S143-Si376-D (simillarity score: 0.5854402673967348)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5784307788387836)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5784307788387836)
S143-SE66-MDR <class 'str'>
132
Recommending 5 products similar to S143-SE66-MDR
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:37] "[37mPOST /api/recomender/S143-SE66-MDR HTTP/1.1[0m" 200 -


Recommended: SuperServer-5019S-WR (simillarity score: 0.9156223246485974)
Recommended: SuperServer-5019S-M (simillarity score: 0.6155659511536719)
Recommended: S143-Si376-D (simillarity score: 0.6096891471894528)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5695223298924028)
Recommended: SuperServer-1019S-WR (simillarity score: 0.5695223298924028)
SuperServer-5019S-M <class 'str'>
201
Recommending 5 products similar to SuperServer-5019S-M
------------------------------------------------------------


127.0.0.1 - - [08/Jan/2021 10:11:43] "[37mPOST /api/recomender/SuperServer-5019S-M HTTP/1.1[0m" 200 -


Recommended: S143-Si376-D (simillarity score: 0.9012871843400873)
Recommended: SuperServer-5019S-L (simillarity score: 0.7038623139344671)
Recommended: S122-Si376-D (simillarity score: 0.6446049219703406)
Recommended: SuperServer-5019S-WR (simillarity score: 0.638326640071691)
Recommended: S143-SE66-MDR (simillarity score: 0.6155659511536719)
