In [21]:
!pip install googletrans==4.0.0-rc1




In [22]:
import pandas as pd
from googletrans import Translator

# Load your CSV
df = pd.read_csv("/content/final_banking_data_translated.csv")

In [23]:
# function to detect Tamil characters
def is_tamil(text):
    return any("\u0B80" <= ch <= "\u0BFF" for ch in str(text))

# function to detect German keywords
def is_german(text):
    german_words = ["und", "Sie", "ein", "das", "nicht", "der", "die", "mit"]
    return any(word in str(text) for word in german_words)


In [24]:
# Step 1: Move misplaced Tamil responses from 'Response' to 'Response_TA'
mask_ta = df["Response"].apply(is_tamil)
df.loc[mask_ta, "Response_TA"] = df.loc[mask_ta, "Response"]
df.loc[mask_ta, "Response"] = ""

# Step 2: Move misplaced German responses from 'Response' to 'Response_DE'
mask_de = df["Response"].apply(is_german)
df.loc[mask_de, "Response_DE"] = df.loc[mask_de, "Response"]
df.loc[mask_de, "Response"] = ""


In [25]:
# Step 3: Back-fill missing translations using Google Translate
translator = Translator()

def fill_translation(row, lang_code):
    col = f"Response_{lang_code.upper()}"
    if pd.isna(row[col]) or row[col].strip() == "":
        try:
            return translator.translate(str(row["Response"]), dest=lang_code).text
        except:
            return ""
    return row[col]

# Apply for German and Tamil (rate-limited, be patient!)
df["Response_DE"] = df.apply(lambda row: fill_translation(row, "de"), axis=1)
df["Response_TA"] = df.apply(lambda row: fill_translation(row, "ta"), axis=1)

# Save fixed dataset
df.to_csv("final_banking_data_translated_fixed.csv", index=False)
print(" Dataset cleaned and saved as final_banking_data_translated_fixed.csv")

✅ Dataset cleaned and saved as final_banking_data_translated_fixed.csv


In [26]:
import pandas as pd

df = pd.read_csv("/content/final_banking_data_translated_fixed.csv")

# Check for blank or missing responses
print(" Empty English Responses:")
print(df[df["Response"].isna() | (df["Response"].str.strip() == "")][["Query", "Response"]])

print("\n Sample Rows:")
print(df.sample(10))

 Empty English Responses:
                                                  Query Response
2     எனது கணக்கு செயல்பாட்டு வரலாற்றை நான் பார்க்க ...      NaN
3     Kann ich eine Kopie meiner Kontosgeschichte er...      NaN
5     Was muss ich bereitstellen, um ein Bankkonto z...      NaN
13    Kann ich über eine mobile App auf mein Konto z...      NaN
14                         Wie schließe ich mein Konto?      NaN
...                                                 ...      ...
1302  தனிப்பட்ட கடன் வரிக்கு நான் எவ்வாறு விண்ணப்பிப...      NaN
1305  Kann ich meine Account -Aktivitätsgeschichte a...      NaN
1307  Kann ich ein Darlehen für die Verbesserung des...      NaN
1313          Bieten Sie Investmentdienstleistungen an?      NaN
1316                   How do I set up a joint account?      NaN

[313 rows x 2 columns]

 Sample Rows:
                                                  Query  \
592   What is the interest rate for a homeimprovemen...   
1026  What is the interest rate for a

Having some issues in the English response EN here in the below steps we are using Translation only if the English column is Empty

In [27]:
# Setup translator
translator = Translator()



In [28]:
# Function to translate to English when Response is missing
def fill_english(row):
    if pd.isna(row["Response"]) or row["Response"].strip() == "":
        # Use German if available
        if pd.notna(row["Response_DE"]) and row["Response_DE"].strip() != "":
            try:
                return translator.translate(row["Response_DE"], dest="en").text
            except:
                return ""
        # Use Tamil if German is not available
        elif pd.notna(row["Response_TA"]) and row["Response_TA"].strip() != "":
            try:
                return translator.translate(row["Response_TA"], dest="en").text
            except:
                return ""
    return row["Response"]  # If already filled, leave it

# Apply the fix to dataset
df["Response"] = df.apply(fill_english, axis=1)

# Save updated dataset
df.to_csv("Internatioanl_banking_data_chatbot.csv", index=False)
print(" Saved as final_banking_data_translated_final.csv")



 Saved as final_banking_data_translated_final.csv


In [29]:
df = pd.read_csv("/content/Internatioanl_banking_data_chatbot.csv")

# Check for blank or missing responses
print(" Empty English Responses:")
print(df[df["Response"].isna() | (df["Response"].str.strip() == "")][["Query", "Response"]])

print(" Sample Rows:")
print(df.sample(10))

 Empty English Responses:
                                                  Query Response
1279  எனது கணக்கு பரிவர்த்தனைகளுக்கான விழிப்பூட்டல்க...      NaN
 Sample Rows:
                                                  Query  \
888                 Wie richte ich Online -Banking ein?   
1307  Kann ich ein Darlehen für die Verbesserung des...   
236     What is the interest rate for a education loan?   
213   What is the interest rate for a debtconsolidat...   
324   What is the interest rate for a homeimprovemen...   
614   What is the interest rate for a debtconsolidat...   
433       What is the interest rate for a venture loan?   
683     What is the interest rate for a education loan?   
500     What is the interest rate for a education loan?   
104   What is the interest rate for a debtconsolidat...   

                                               Response language     intent  \
888   To set up online banking, visit our website an...       de    unknown   
1307  Yes, we offer lo