In [1]:
from os import environ
from google.cloud import translate

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

In [2]:
project_id = environ.get("PROJECT_ID", "")
assert project_id
parent = f"projects/{project_id}"
client = translate.TranslationServiceClient()

In [3]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset.
df = pd.read_csv("./IndianFoodDatasetCSV.csv")
df_categories = df.loc[
    df["Course"].isin(["Lunch", "Side Dish", "Snack", "Dinner", "Dessert", "Appetizer", "Main Course"])== True
]
required_columns = [
    "TranslatedRecipeName",
    "TranslatedIngredients",
    "PrepTimeInMins",
    "CookTimeInMins",
    "TotalTimeInMins",
    "Servings",
    "Course",
    "TranslatedInstructions",
]
df_categories_selected = df_categories[required_columns].copy(deep=True)

# Renaming the columns and preparing the train and test data frames.
df_categories_selected.columns = [
    "RecipeName_english",
    "Ingredients_english",
    "PrepTimeInMins",
    "CookTimeInMins",
    "TotalTimeInMins",
    "Servings",
    "Course",
    "Instructions_english",
]
df_categories_selected = df_categories[required_columns]
train, test = train_test_split(df_categories_selected, test_size=0.33, random_state=42)


In [4]:
train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

In [5]:
def detect_language(text):
    return client.detect_language(parent=parent, content=text).languages

In [6]:
def convert(text, target_language_code):
    response = client.translate_text(
        contents=[text],
        target_language_code=target_language_code,
        parent=parent,
    )
    
    return response.translations[0].translated_text

In [7]:
for index, row in tqdm(train.iterrows()):
    try: # RecipeName
        train.TranslatedRecipeName[index] = convert(row.TranslatedRecipeName, "en")
    except Exception as e:
        print(f"-----------\n\nRecipeName, {index}, {row.TranslatedRecipeName}\n\n-----------")
        pass
        
    try: # Ingredients
        train.TranslatedIngredients[index] = convert(row.TranslatedIngredients, "en")
    except Exception as e:
        print(f"-----------\n\nIngredients, {index}, {row.TranslatedIngredients}\n\n-----------")
        pass
        
    try: # Instructions
        train.TranslatedInstructions[index] = convert(row.TranslatedInstructions, "en")
    except Exception as e:
        print(f"-----------\n\nInstructions, {index}, {row.TranslatedInstructions}\n\n-----------")
        continue

0it [00:00, ?it/s]

-----------

Ingredients, 382, nan

-----------
-----------

Ingredients, 468, nan

-----------
-----------

Ingredients, 3129, nan

-----------


In [8]:
for index, row in tqdm(test.iterrows()):
    try: # RecipeName
        test.TranslatedRecipeName[index] = convert(row.TranslatedRecipeName, "en")
    except Exception as e:
        print(f"-----------\n\nRecipeName, {index}, {row.TranslatedRecipeName}\n\n-----------")
        pass
        
    try: # Ingredients
        test.TranslatedIngredients[index] = convert(row.TranslatedIngredients, "en")
    except Exception as e:
        print(f"-----------\n\nIngredients, {index}, {row.TranslatedIngredients}\n\n-----------")
        pass
        
    try: # Instructions
        test.TranslatedInstructions[index] = convert(row.TranslatedInstructions, "en")
    except Exception as e:
        print(f"-----------\n\nInstructions, {index}, {row.TranslatedInstructions}\n\n-----------")
        continue

0it [00:00, ?it/s]

-----------

Ingredients, 11, nan

-----------
-----------

Ingredients, 1821, nan

-----------


In [9]:
train.to_csv('train_translated.csv', index=False)
test.to_csv('test_translated.csv', index=False)