## This notebook will focus on converting all russian based product names to english names using google translate

In [17]:
__author__ = "konwar.m"
__copyright__ = "Copyright 2022, AI R&D"
__credits__ = ["konwar.m"]
__license__ = "Individual Ownership"
__version__ = "1.0.1"
__maintainer__ = "konwar.m"
__email__ = "rickykonwar@gmail.com"
__status__ = "Development"

In [18]:
# Importing Libraries
import os
import copy
import tqdm
import pickle
import pandas as pd
from googletrans import Translator

In [3]:
os.chdir('..')
os.getcwd()

'c:\\Users\\manash.jyoti.konwar\\Documents\\AI_Random_Projects\\ML-Retail-Sales'

In [19]:
# Example on how to run google translate api
translator = Translator()
translation = translator.translate('안녕하세요.', dest='ja')
print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})")

안녕하세요. (ko) --> こんにちは。 (ja)


In [20]:
# Reading item and categories files and shop names
item_category_data = pd.read_csv(r'datasets\item_categories.csv')
item_data = pd.read_csv('datasets\items.csv')
shop_data = pd.read_csv('datasets\shops.csv')

In [21]:
# Forming Item Category Dictionary
item_category_dict = {} if not os.path.exists(os.path.join('datasets','saved_item_categories.pkl')) else pickle.load(open(os.path.join('datasets','saved_item_categories.pkl'), 'rb'))
item_dict = {} if not os.path.exists(os.path.join('datasets','saved_item.pkl')) else pickle.load(open(os.path.join('datasets','saved_item.pkl'), 'rb'))
shop_dict = {} if not os.path.exists(os.path.join('datasets','saved_stores.pkl')) else pickle.load(open(os.path.join('datasets','saved_stores.pkl'), 'rb'))

In [22]:
def translate_text(original_text, destination_lang='en'):
    try:
        translator = Translator()
        translation = translator.translate(original_text, dest=destination_lang)
        return translation.text
    except Exception as ex:
        print('Caught Exception while translating text: %s with exception as %s' %(original_text, ex))

In [23]:
def extract_translated_text(input_dict, original_text):
    if original_text in input_dict.keys():
        return input_dict[original_text]
    else:
        return original_text

In [24]:
# Convert item categories names
# Check if translated item categories file exists
if not os.path.exists(os.path.join('datasets','translated_item_categories.csv')):
    for category_name in tqdm.tqdm(list(item_category_data.item_category_name.unique()), desc='Translating Item Categories to English'):
        if category_name not in item_category_dict.keys():
            item_category_dict[category_name] = translate_text(category_name)
    item_category_data['translated_item_category_name'] = item_category_data['item_category_name'].apply(lambda x: extract_translated_text(input_dict=item_category_dict, original_text=x))
else:
    item_category_data = pd.read_csv(os.path.join('datasets','translated_item_categories.csv'))
item_category_data.head()

Translating Item Categories to English: 100%|██████████| 84/84 [00:00<00:00, 83926.04it/s]


Unnamed: 0,item_category_name,item_category_id,translated_item_category_name
0,PC - Гарнитуры/Наушники,0,PC - Headset / Headphones
1,Аксессуары - PS2,1,Accessories - PS2.
2,Аксессуары - PS3,2,Accessories - PS3.
3,Аксессуары - PS4,3,Accessories - PS4.
4,Аксессуары - PSP,4,Accessories - PSP.


In [25]:
# Convert shop names
# Check if translated shop file exists
if not os.path.exists(os.path.join('datasets','translated_shops.csv')):
    for shop_name in tqdm.tqdm(list(shop_data.shop_name.unique()), desc='Translating Shop Names to English'):
        if shop_name not in shop_dict.keys():
            shop_dict[shop_name] = translate_text(shop_name)
    shop_data['translated_shop_name'] = shop_data['shop_name'].apply(lambda x: extract_translated_text(input_dict=shop_dict, original_text=x))
else:
    shop_data = pd.read_csv(os.path.join('datasets','translated_shops.csv'))
shop_data.head()

Translating Shop Names to English: 100%|██████████| 60/60 [00:00<?, ?it/s]


Unnamed: 0,shop_name,shop_id,translated_shop_name
0,"!Якутск Орджоникидзе, 56 фран",0,"! Yakutsk Ordzhonikidze, 56 Fran"
1,"!Якутск ТЦ ""Центральный"" фран",1,"! Yakutsk shopping center ""Central"" Fran"
2,"Адыгея ТЦ ""Мега""",2,"Adygea TC ""Mega"""
3,"Балашиха ТРК ""Октябрь-Киномир""",3,"Balashikha TRK ""October-Kinomir"""
4,"Волжский ТЦ ""Волга Молл""",4,"Volzhsky shopping center ""Volga Mall"""


In [26]:
# Convert item names
# Check if translated item file exists
if not os.path.exists(os.path.join('datasets','translated_items.csv')):
    for item_name in tqdm.tqdm(list(item_data.item_name.unique()), desc='Translating Item Names to English'):
        if item_name not in item_dict.keys():
            item_dict[item_name] = translate_text(item_name)
    item_data['translated_item_name'] = item_data['item_name'].apply(lambda x: extract_translated_text(input_dict=item_dict, original_text=x))
else:
    item_data = pd.read_csv(os.path.join('datasets','translated_items.csv'))
item_data.head()

Translating Item Names to English: 100%|██████████| 22170/22170 [00:00<00:00, 1056378.53it/s]


Unnamed: 0,item_name,item_id,item_category_id,translated_item_name
0,! ВО ВЛАСТИ НАВАЖДЕНИЯ (ПЛАСТ.) D,0,40,LuckyIn the power of the puff (layer.) D
1,!ABBYY FineReader 12 Professional Edition Full...,1,76,! ABBYY FineReader 12 Professional Edition Ful...
2,***В ЛУЧАХ СЛАВЫ (UNV) D,2,40,*** In the rays of glory (UNV) D
3,***ГОЛУБАЯ ВОЛНА (Univ) D,3,40,*** Blue Wave (Univ) D
4,***КОРОБКА (СТЕКЛО) D,4,40,*** Box (Glass) D


In [13]:
# Check for translation issues
copied_item_dict=copy.deepcopy(item_dict)
for k in tqdm.tqdm(copied_item_dict.keys(), desc='Correcting_Translations'):
    if copied_item_dict[k] is None:
        item_dict[k] = translate_text(k)

Correcting_Translations: 100%|██████████| 22170/22170 [01:08<00:00, 323.41it/s]


In [28]:
# Save Translated files
item_category_data.to_csv(os.path.join('datasets','translated_item_categories.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_item_categories.csv')) else \
    print('Translated Item Category File Exist')
item_data.to_csv(os.path.join('datasets','translated_items.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_items.csv')) else \
    print('Translated Item File Exist')
shop_data.to_csv(os.path.join('datasets','translated_shops.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_shops.csv')) else \
    print('Translated Shop File Exist')

In [29]:
with open(os.path.join('datasets','saved_item_categories.pkl'), 'wb') as f:
    pickle.dump(item_category_dict, f)
with open(os.path.join('datasets','saved_stores.pkl'), 'wb') as f:
    pickle.dump(shop_dict, f)
with open(os.path.join('datasets','saved_item.pkl'), 'wb') as f:
    pickle.dump(item_dict, f)