# Flikpart product data mapping

In [2]:
# importing libraries
from fuzzywuzzy import fuzz
import pandas as pd 
import re
import csv
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Amazon product dataset
amazon = pd.read_csv('dataset/AmazonProductdata.csv')

# change columns name
amazon.rename(columns={'Name': 'Amazon_Name',
                       'Synonyms': 'Amazon_Synonyms',
                       'SalesPrice': 'Amazon_SalesPrice',
                       'OriginalPrice': 'Amazon_OriginalPrice',
                       'Availability':'Amazon_Availability',
                       'Ratings': 'Amazon_Rating',
                       'ProductLink': 'Amazon_ProductLink',
                       'ImageLink': 'Amazon_ImageLink'}, inplace=True)

In [4]:
# Flipkart product dataset
flipkart = pd.read_csv('dataset/FlipkartDataset.csv')

# change columns name
flipkart.rename(columns={'Name': 'Flipkart_Name',
                         'Synonyms': 'Flipkart_Synonyms',
                         'SalesPrice': 'Flipkart_SalesPrice',
                         'OriginalPrice': 'Flipkart_OriginalPrice',
                         'Rating': 'Flipkart_Rating',
                         'ProductLink': 'Flipkart_ProductLink',
                         'ImageLink': 'Flipkart_ImageLink'}, inplace=True)

In [5]:
# Snapdeal product dataset
snapdeal= pd.read_csv('dataset/SnapdealDataset.csv')

# change columns name
snapdeal.rename(columns={'Name': 'Snapdeal_Name',
                         'Synonyms': 'Snapdeal_Synonyms',
                         'SalesPrice': 'Snapdeal_SalesPrice',
                         'OriginalPrice': 'Snapdeal_OriginalPrice',
                         'Rating': 'Snapdeal_Rating',
                         'ProductLink': 'Snapdeal_ProductLink',
                         'ImageLink': 'Snapdeal_ImageLink',
                        'Availibility':'Snapdeal_Availability'}, inplace=True)

In [6]:
# Remove all the special characters
amazon_name = amazon['Amazon_Synonyms'].str.replace(r"[^0-9,a-z,A-Z, ]",'')
amazon_name = amazon_name.str.replace(r"[,]",'')
amazon_name_lst = amazon_name.to_list()

# remove duplicate string
new_amazon_name_lst = []
for lst in amazon_name_lst:
    new_amazon_lst = (' '.join(dict.fromkeys(lst.split())))
    new_amazon_name_lst.append(new_amazon_lst)


amazon.insert(1, "New_Amazon_Name",new_amazon_name_lst)

In [7]:
# Remove all the special characters
flipkart_name = flipkart['Flipkart_Name'].str.replace(r"[(),,]",'')
flipkart.insert(1, "New_Flipkart_Name",flipkart_name, True)

In [8]:
# Remove all the special characters
snapdeal_name = snapdeal['Snapdeal_Name'].str.replace(r"[(),,]",'')
snapdeal.insert(1, "New_Snapdeal_Name",snapdeal_name, True)

# Flipkart data mapping

### 1.Flipkart with Amazon

In [9]:
def get_match(flipkart):
    """Extract and Return matched value using fuzzywuzzy"""
    
    dictionary_name = {}
    try:
        # iterate flipkart product name
        for product_name in amazon['New_Amazon_Name']:

            # string matching using token_set_ratio function
            match = fuzz.token_set_ratio(flipkart, product_name)
            
            if match >= 85:
                dictionary_name[product_name] = match
                Keymax = max(dictionary_name, key=dictionary_name.get)
                
        # return max matched value
        return Keymax
    except:
        # return NAN if not matched
        return ''

    
# store matched values
amazon_result = []

# Iterate flipkart product name
for item in flipkart['New_Flipkart_Name']:
    record = get_match(item)
    amazon_result.append(record)

flipkart.insert(2, "New_Amazon_Name", amazon_result)

### 2.Flipkart with Snapdeal

In [11]:
def get_match(flipkart):
    """Extract and Return matched value using fuzzywuzzy"""
    
    dictionary_name = {}
    try :
        # iterate snapdeal product name
        for product_name in snapdeal['New_Snapdeal_Name']:
            
            # string matching using token_set_ratio function
            match = fuzz.token_set_ratio(flipkart,product_name)
            
            if match >=85 :
                dictionary_name[product_name] = match
                Keymax = max(dictionary_name, key=dictionary_name.get)   
        
        # return max matched value       
        return Keymax
    except :
        # return NAN if not matched
        return ''

# store matched values
snapdeal_result = []

# Iterate amazon product name
for item in flipkart['New_Flipkart_Name']:
    record = get_match(item)
    snapdeal_result.append(record)
    
flipkart.insert(3, "New_Snapdeal_Name",snapdeal_result)

# Merge Flipkart dataset

In [13]:
# Merge amazon and snapdeal data in flipkart dataset
Flipkart_data = flipkart.merge(amazon,on='New_Amazon_Name',how='left')
new_flipkart_data = Flipkart_data.merge(snapdeal,on='New_Snapdeal_Name',how='left')

In [14]:
# drop unwanted columns
new_flipkart_data = new_flipkart_data.drop(['Id','ID_x','ID_y','New_Amazon_Name','New_Flipkart_Name','New_Snapdeal_Name'], axis = 1)

In [15]:
# There are the columns 
new_flipkart_data.columns

Index(['Flipkart_Name', 'Flipkart_Synonyms', 'Flipkart_SalesPrice',
       'Flipkart_OriginalPrice', 'Flipkart_Rating', 'Flipkart_ProductLink',
       'Flipkart_ImageLink', 'Amazon_Name', 'Amazon_Synonyms',
       'Amazon_SalesPrice', 'Amazon_OriginalPrice', 'Amazon_Availability',
       'Amazon_Rating', 'Amazon_ProductLink', 'Amazon_ImageLink',
       'Snapdeal_Name', 'Snapdeal_Synonyms', 'Snapdeal_SalesPrice',
       'Snapdeal_OriginalPrice', 'Snapdeal_Rating', 'Snapdeal_ProductLink',
       'Snapdeal_ImageLink', 'Snapdeal_Availability'],
      dtype='object')

# Save data into csv file

In [18]:
# Drop duplicates column from flipart name 
new_flipkart_data = new_flipkart_data.drop_duplicates()

# Save dataframe in csv file
new_flipkart_data.to_csv('FlipkartMappedData.csv',index=0)