# Feature Engineering
## 1. Create new columns for scoring system

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df_cleaned = pd.read_csv('cleaned_data.csv')

df_eng = df_cleaned.copy()
df_eng['Discount Percent'] = df_eng['Discount Range']-df_eng['Price Range']
df_eng['Total Sold'] = scaler.fit_transform(df_eng[['Total Sold']])
df_eng['High Interest'] = scaler.fit_transform(df_eng[['Favorite']])

# Conditions
df_eng['Highly Rated'] = df_eng['Current Rating'].astype(float).map(lambda x: True if x>4.2 else False)
df_eng['Discounted'] = df_eng['Discount Percent'].astype(float).map(lambda x: True if x>0.03 else False)
df_eng['Top Selling'] = df_eng['Total Sold'].map(lambda x: True if x>0.15 else False)
df_eng['High Interest'] = df_eng['High Interest'].map(lambda x: True if x>0.15 else False)

# Scale
# df_eng['Current Rating'] = scaler.fit_transform(df_eng[['Current Rating']])
# df_eng['Price Range'] = scaler.fit_transform(df_eng[['Price Range']])

# New Columns
df_eng['Trusted'] = df_eng.apply(lambda x: x['Preferred'] | x['Mall'], axis=1)

model_features = ['Price Range']
scoring_criteria = ['Trusted', 'Highly Rated', 'Discounted', 'Top Selling', 'High Interest']

display(df_eng[model_features].head(3))
display(df_eng[scoring_criteria].head(3))

Unnamed: 0,Price Range
0,18.0
1,64.0
2,59.0


Unnamed: 0,Trusted,Highly Rated,Discounted,Top Selling,High Interest
0,True,True,True,False,False
1,False,True,True,False,False
2,False,True,True,False,False


# Scoring System
## 1. Get 10 sample products for scoring system. Replace this with Model's output product list once available.

In [3]:
import random

# nearest = df_eng['Product ID'].isin(nearest['Product ID']).astype(int)

# Getting dummy products
prd_list = df_eng.sample(n=20)
prd_list['relevance'] = np.random.uniform(0, 1, prd_list.shape[0])
prd_list['score'] = prd_list['relevance']
prd_list.head(3)

Unnamed: 0.1,Unnamed: 0,URL,Page,Preferred,Mall,Product Name,Main Category,Sub Category 1,Sub Category 2,Current Rating,...,With Media,Description,Discount Percent,High Interest,Highly Rated,Discounted,Top Selling,Trusted,relevance,score
10937,10966,https://shopee.ph/COMFORTABLE-TO-WEAR-NEOPRENE...,65.0,False,False,COMFORTABLE TO WEAR NEOPRENE WASHABLE FACE MAS...,Sports & Travel,Travel Accessories,Pollution Masks,4.8,...,43,b'So comfortable to wearWell-sewnHigh qualityW...,-22.0,False,True,False,False,False,0.429273,0.429273
5799,5814,https://shopee.ph/Rohto-Eyedrops-Japan-2023-Ex...,38.0,True,False,Rohto Eyedrops Japan 2023 Expiration,Health & Personal Care,Personal Care,Eye Care,5.0,...,226,Authenticity GuaranteedExpiration 2022📌Blue: I...,-172.0,False,True,False,False,True,0.884201,0.884201
15230,15338,https://shopee.ph/-COD-Sexy-Panty-Women-Lace-U...,37.0,False,False,[ COD ] Sexy Panty Women Lace Underwear Cotton...,Women's Apparel,Lingerie & Nightwear,Panty,4.9,...,0,b'',90.0,False,True,True,False,False,0.116233,0.116233


## 2. Score and sort products

In [4]:
# Testing scoring system on dummy products

# Filtering low quality products
scored_list = prd_list[prd_list['Current Rating'] > 3.8]
display(scoring_criteria)
display(scored_list[['Product Name']+scoring_criteria+['relevance']+['score']].sort_values(by=['score'], ascending=False).head())

# Scoring System
trusted_bias = 0.05
highly_rated_bias = 0.05
discounted_bias = 0.05
top_selling_bias = 0.05
high_interest_bias = 0.05

scored_list['score'] = scored_list.apply(lambda x: x['score']+trusted_bias if x['Trusted'] == True else x['score'], axis=1)
scored_list['score'] = scored_list.apply(lambda x: x['score']+highly_rated_bias if x['Highly Rated'] == True else x['score'], axis=1)
scored_list['score'] = scored_list.apply(lambda x: x['score']+discounted_bias if x['Discounted'] == True else x['score'], axis=1)
scored_list['score'] = scored_list.apply(lambda x: x['score']+top_selling_bias if x['Top Selling'] == True else x['score'], axis=1)
scored_list['score'] = scored_list.apply(lambda x: x['score']+high_interest_bias if x['High Interest'] == True else x['score'], axis=1)
display(scored_list[['Product Name']+scoring_criteria+['relevance']+['score']].sort_values(by=['score'], ascending=False).head())

['Trusted', 'Highly Rated', 'Discounted', 'Top Selling', 'High Interest']

Unnamed: 0,Product Name,Trusted,Highly Rated,Discounted,Top Selling,High Interest,relevance,score
4974,Luxe Organix Niacinamide (Vit B3) Miracle Repa...,True,True,False,False,False,0.943544,0.943544
7643,WISDOM 294 6Colors BALLPEN school supplies,True,True,True,False,False,0.934876,0.934876
5799,Rohto Eyedrops Japan 2023 Expiration,True,True,False,False,False,0.884201,0.884201
5792,Made in Japan Face Mask Surgical Disposable 3p...,False,True,False,False,False,0.882064,0.882064
15493,Kelly Cod Beltbag Waist Bag Fanny Pack Sidebag...,False,True,True,False,False,0.872288,0.872288


Unnamed: 0,Product Name,Trusted,Highly Rated,Discounted,Top Selling,High Interest,relevance,score
7643,WISDOM 294 6Colors BALLPEN school supplies,True,True,True,False,False,0.934876,1.084876
4974,Luxe Organix Niacinamide (Vit B3) Miracle Repa...,True,True,False,False,False,0.943544,1.043544
5799,Rohto Eyedrops Japan 2023 Expiration,True,True,False,False,False,0.884201,0.984201
15493,Kelly Cod Beltbag Waist Bag Fanny Pack Sidebag...,False,True,True,False,False,0.872288,0.972288
13508,7color Adult sleep pajama high quality cotton ...,False,True,True,False,False,0.83568,0.93568
