# Translation of the french category names to english

### Read the given category names csv file using pandas

In [3]:
import pandas as pd
df=pd.read_csv('category_names.csv')
df.head()

Unnamed: 0,category_id,category_level1,category_level2,category_level3
0,1000021794,ABONNEMENT / SERVICES,CARTE PREPAYEE,CARTE PREPAYEE MULTIMEDIA
1,1000012764,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI FUMEUR
2,1000012776,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI VELO - ABRI MOTO
3,1000012768,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,FONTAINE A EAU
4,1000012755,AMENAGEMENT URBAIN - VOIRIE,SIGNALETIQUE,PANNEAU D'INFORMATION EXTERIEUR


In [4]:
df.shape

(5270, 4)

* There are 5270 unique categories of products.
* The csv file maps the given category id to the category levels given in the csv file

### Let us check if the given category id corresponds directly to category level 3

In [5]:
df['category_level3'].shape

(5270,)

* Yes the given category id directly corresponds to the category level 3

## Here we wil translate the columns one by one using googletranslate module which is fairly accurate as we have to translate only a set of 2 or 3 words and not long sentences etc

### <b> 1. Translating category_level3 column

In [13]:
from googletrans import Translator

translator = Translator()

#store all the category names in an array
arr=list(df['category_level3'].values)

#strip the names so that spaces are removed at the ends which is causing problem when translating
arr2= [var.strip() for var in arr]

#list to store the translated category names 
translated_sent=[]

#Loop through each category name to translate it and save it to the list
from tqdm import tqdm
for sent in tqdm(arr2):
    arr2_trans=translator.translate(sent,src='fr',dest='en')
    translated_sent.append(arr2_trans)
    

100%|██████████| 5270/5270 [1:58:17<00:00,  1.35s/it]  


### Here we have saved the googletranslate object and not the actual text , so we need to get the translated text from it as shown below

In [16]:
translated_category = [sent.text for sent in translated_sent]
translated_category

['PREPAYEE MULTIMEDIA CARD',
 'Smoking shelter',
 'Bike shelter - motorcycle shelter',
 'WATER FOUNTAIN',
 'Exterior Information Panel',
 'Signaling Cone - Signage Plot',
 'ROAD PANNEL',
 'Signaling ribbon',
 'Parking Block - Parking Cop',
 'Post - Post',
 'Trough',
 'Taper',
 'FEEDING BOTTLE',
 'Food distributor',
 'WATER FOUNTAIN',
 'Gamelle - ECUEL - Gamelle accessory - ECUEL ACCESSORY',
 'Accessory kit for meals',
 'Featoire - Tremie',
 'Nourishing Clip - Nourishing Excavator - Nourishing Spoon',
 'RACK',
 'Food Transport - Storage Box - Candy Pouch',
 'Food Mineral - Shell - Salt in Lecher - Pierre To Breed',
 'Farm Food - Food Compressor',
 'Power Box - Food Patee - Wet Food - Soft Food',
 'GREASE BALL',
 'Croquette - Dry Food',
 'Extrude - granule',
 'Flakes - Mash - Muesli',
 'Hay - Fodder',
 'Fondness',
 'SEEDS',
 'Eating Grass - Cat Grass',
 'Animal feed kit',
 'MEDIUM MEDIUM - Drink Maternal',
 'Catiere - Cat Door - Cat Trap',
 'HEATER',
 'LIGHTING',
 'Maintenance - Treatmen

###  Save the translated category level3 names and add them also as the new column of the dataframe

In [18]:
df['translated_category_level3']=translated_category
df.head()

Unnamed: 0,category_id,category_level1,category_level2,category_level3,translated_category_level3
0,1000021794,ABONNEMENT / SERVICES,CARTE PREPAYEE,CARTE PREPAYEE MULTIMEDIA,PREPAYEE MULTIMEDIA CARD
1,1000012764,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI FUMEUR,Smoking shelter
2,1000012776,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI VELO - ABRI MOTO,Bike shelter - motorcycle shelter
3,1000012768,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,FONTAINE A EAU,WATER FOUNTAIN
4,1000012755,AMENAGEMENT URBAIN - VOIRIE,SIGNALETIQUE,PANNEAU D'INFORMATION EXTERIEUR,Exterior Information Panel


In [19]:
#save the dataframe to csv file for later use 
df.to_csv('translated_category_names.csv',index=False)

### <b> 2. Translating category_level2 column

* Here we follow the same procedure as followed above

In [20]:
#store all the category names in an array
arr=list(df['category_level2'].values)

#strip the names so that spaces are removed at the ends which is causing problem when translating
arr2= [var.strip() for var in arr]

#list to store the translated category names 
translated_sent=[]

#Loop through each category name to translate it and save it to the list
for sent in tqdm(arr2):
    arr2_trans=translator.translate(sent,src='fr',dest='en')
    translated_sent.append(arr2_trans.text)

100%|██████████| 5270/5270 [1:01:16<00:00,  1.43it/s]


In [21]:
translated_sent

['PREPAID CARD',
 'Urban development',
 'Urban development',
 'Urban development',
 'Signaletic',
 'ROAD SIGN',
 'ROAD SIGN',
 'ROAD SIGN',
 'Road',
 'Road',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'Meal accessory',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'FOOD',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Habitat Technical Development',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Tie - Saddlery',
 'Habitat decoration',
 'Habitat de

Save the translated category level2 also to the dataframe as a new column

In [24]:
df['translated_category_level2']=translated_sent

#save the dataframe to the disk for later use
df.to_csv('translated_category_names.csv',index=False)
df.head()

Unnamed: 0,category_id,category_level1,category_level2,category_level3,translated_category_level3,translated_category_level2
0,1000021794,ABONNEMENT / SERVICES,CARTE PREPAYEE,CARTE PREPAYEE MULTIMEDIA,PREPAYEE MULTIMEDIA CARD,PREPAID CARD
1,1000012764,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI FUMEUR,Smoking shelter,Urban development
2,1000012776,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI VELO - ABRI MOTO,Bike shelter - motorcycle shelter,Urban development
3,1000012768,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,FONTAINE A EAU,WATER FOUNTAIN,Urban development
4,1000012755,AMENAGEMENT URBAIN - VOIRIE,SIGNALETIQUE,PANNEAU D'INFORMATION EXTERIEUR,Exterior Information Panel,Signaletic


### <b> 3. Translating category_level1 column </b>
* Here also we follow the same procedure as above 

In [27]:
#store all the category names in an array
arr=list(df['category_level1'].values)

#strip the names so that spaces are removed at the ends which is causing problem when translating
arr2= [var.strip() for var in arr]

#list to store the translated category names 
translated_sent=[]

#Loop through each category name to translate it and save it to the list
for sent in tqdm(arr2):
    arr2_trans=translator.translate(sent,src='fr',dest='en')
    translated_sent.append(arr2_trans.text)

100%|██████████| 5270/5270 [59:34<00:00,  1.47it/s]  


In [28]:
translated_sent

['Subscription / Services',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'Urban Processing - Roads',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHOP',
 'PET SHO

Save the translated category level2 also to the dataframe as a new column

In [None]:
df['translated_category_level1'] = translated_sent

#save the dataframe to the disk for later use
df.to_csv('translated_category_names.csv',index=False)
df.head()

Unnamed: 0,category_id,category_level1,category_level2,category_level3,translated_category_level3,translated_category_level2,translated_category_level1
0,1000021794,ABONNEMENT / SERVICES,CARTE PREPAYEE,CARTE PREPAYEE MULTIMEDIA,PREPAID MULTIMEDIA CARD,PREPAID CARD,Subscription / Services
1,1000012764,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI FUMEUR,Smoking shelter,Urban development,Urban Processing - Roads
2,1000012776,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI VELO - ABRI MOTO,Bike shelter - motorcycle shelter,Urban development,Urban Processing - Roads
3,1000012768,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,FONTAINE A EAU,WATER FOUNTAIN,Urban development,Urban Processing - Roads
4,1000012755,AMENAGEMENT URBAIN - VOIRIE,SIGNALETIQUE,PANNEAU D'INFORMATION EXTERIEUR,Exterior Information Panel,Signaletic,Urban Processing - Roads
