In [10]:
import pandas as pd
import numpy as np

In [11]:
amazon = pd.read_csv('amz_com-ecommerce_sample.csv', encoding='latin-1').dropna()
flipkart = pd.read_csv('flipkart_com-ecommerce_sample.csv', encoding='latin-1').dropna()

In [12]:
amazon['source'] = 'amazon'
flipkart['source'] = 'flipkart'

In [13]:
amazon = amazon[['product_name', 'retail_price', 'discounted_price', 'source']]
flipkart = flipkart[['product_name', 'retail_price', 'discounted_price', 'source']]

df = pd.concat([amazon, flipkart], ignore_index=True)

In [14]:
# Applt TF-IDF for product_name to get the most important words

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(df['product_name'])


In [15]:
# find corelation between the words

from sklearn.metrics.pairwise import linear_kernel

tfidf_matrix_flipkart = tfidf_matrix[df['source'] == 'flipkart']
data_amazon = df[df['source'] == 'amazon']
tfidf_matrix_amazon = tfidf_matrix[df['source'] == 'amazon']

In [16]:
# Recommendation for all products in flipkart
product_name_in_amazon = []
retail_price_in_amazon = []
discounted_price_in_amazon = []

for i in tfidf_matrix_flipkart:
    cosine_sim = linear_kernel(i, tfidf_matrix_amazon)
    similar_indices = cosine_sim[0].argsort()[-1]
    product_name_in_amazon.append(data_amazon.iloc[similar_indices]['product_name'])
    retail_price_in_amazon.append(data_amazon.iloc[similar_indices]['retail_price'])
    discounted_price_in_amazon.append(data_amazon.iloc[similar_indices]['discounted_price'])

In [17]:
d = {
    'Product Name in Flipkart': df[df['source'] == 'flipkart']['product_name'],
    'Retail Price in Flipkart': df[df['source'] == 'flipkart']['retail_price'],
    'Discounted Price in Flipkart': df[df['source'] == 'flipkart']['discounted_price'],
    'Product Name in Amazon': product_name_in_amazon,
    'Retail Price in Amazon': retail_price_in_amazon,
    'Discounted Price in Amazon': discounted_price_in_amazon
}

df_res = pd.DataFrame(d)

df_res = df_res.reset_index(drop=True)

df_res.head(10)

Unnamed: 0,Product Name in Flipkart,Retail Price in Flipkart,Discounted Price in Flipkart,Product Name in Amazon,Retail Price in Amazon,Discounted Price in Amazon
0,Alisha Solid Women's Cycling Shorts,999.0,379.0,Alisha Solid Women's Cycling Shorts,982.0,438.0
1,FabHomeDecor Fabric Double Sofa Bed,32157.0,22646.0,FabHomeDecor Fabric Double Sofa Bed,32143.0,29121.0
2,AW Bellies,999.0,499.0,AW Bellies,991.0,551.0
3,Alisha Solid Women's Cycling Shorts,699.0,267.0,Alisha Solid Women's Cycling Shorts,982.0,438.0
4,Sicons All Purpose Arnica Dog Shampoo,220.0,210.0,Sicons All Purpose Arnica Dog Shampoo,208.0,258.0
5,Eternal Gandhi Super Series Crystal Paper Weig...,430.0,430.0,Eternal Gandhi Super Series Crystal Paper Weig...,427.0,473.0
6,Alisha Solid Women's Cycling Shorts,1199.0,479.0,Alisha Solid Women's Cycling Shorts,982.0,438.0
7,FabHomeDecor Fabric Double Sofa Bed,32157.0,22646.0,FabHomeDecor Fabric Double Sofa Bed,32143.0,29121.0
8,"dilli bazaaar Bellies, Corporate Casuals, Casuals",699.0,349.0,"dilli bazaaar Bellies, Corporate Casuals, Casuals",682.0,385.0
9,Alisha Solid Women's Cycling Shorts,1199.0,479.0,Alisha Solid Women's Cycling Shorts,982.0,438.0


In [20]:
# Recommendation for a specific product in flipkart

index = 1000
product_name_from_flipkart = flipkart.iloc[index]['product_name']

tfidf_matrix_flipkart = tfidf.transform([product_name_from_flipkart])
tfidf_matrix_amazon = tfidf_matrix[df['source'] == 'amazon']

cosine_sim = linear_kernel(tfidf_matrix_flipkart, tfidf_matrix_amazon)
similar_indices = cosine_sim[0].argsort()[-1]

d = pd.DataFrame({
    'Product Name in Flipkart': [product_name_from_flipkart],
    'Retail Price in Flipkart': [flipkart.iloc[index]['retail_price']],
    'Discounted Price in Flipkart': [flipkart.iloc[index]['discounted_price']],
    'Product Name in Amazon': [data_amazon.iloc[similar_indices]['product_name']],
    'Retail Price in Amazon': [data_amazon.iloc[similar_indices]['retail_price']],
    'Discounted Price in Amazon': [data_amazon.iloc[similar_indices]['discounted_price']]
})

d = d.reset_index(drop=True)

display(d)

Unnamed: 0,Product Name in Flipkart,Retail Price in Flipkart,Discounted Price in Flipkart,Product Name in Amazon,Retail Price in Amazon,Discounted Price in Amazon
0,Leaftrend BL-SQ-R-5x5 Wooden Disposable Bowl,350.0,249.0,Leaftrend BL-SQ-R-5x5 Wooden Disposable Bowl,332.0,303.0
