In [None]:
pip install langdetect googletrans==4.0.0-rc1 pandas


In [None]:
import pandas as pd
from langdetect import detect
from googletrans import Translator

def load_file(file_path):
    """
    Load the entire file (not just headers) into a DataFrame
    """
    file_extension = file_path.split('.')[-1].lower()

    if file_extension == 'csv':
        df = pd.read_csv(file_path)
    elif file_extension in ['xls', 'xlsx']:
        df = pd.read_excel(file_path)
    elif file_extension == 'xml':
        df = pd.read_xml(file_path)
    else:
        raise ValueError(f"Unsupported file format: {file_extension}")

    return df

def detect_language(text):
    """
    Detect the language of the text using langdetect
    """
    try:
        language = detect(text)
        return language
    except Exception as e:
        print(f"Error detecting language for text: {text}. Error: {str(e)}")
        return None

def translate_header(header_text, target_language="en"):
    """
    Translate the header text into English using Google Translate
    """
    translator = Translator()

    try:
        # Perform translation
        translation = translator.translate(header_text, dest=target_language)
        return translation.text
    except Exception as e:
        print(f"Error translating text: {header_text}. Error: {str(e)}")
        return header_text  # Return the original text if translation fails

def translate_and_replace_headers(file_path):
    """
    Main function to read file, detect language, translate headers, and replace them in the DataFrame
    """
    try:
        # Load the entire file into a DataFrame
        df = load_file(file_path)

        # Get the current headers (column names)
        headers = df.columns.tolist()
        headers = [header for header in df.columns if not header.startswith('Unnamed')]

        # Translate each header
        translated_headers = {}
        for header in headers:
            # Detect the language of the header
            detected_language = detect_language(header)
            print(f"Detected language for '{header}': {detected_language}")

            # Translate only if the detected language is not English
            if detected_language != "en":
                translated_header = translate_header(header)
            else:
                translated_header = header

            translated_headers[header] = translated_header
            print(f"Original: {header} -> Translated: {translated_header}")

        # Replace the old headers with the translated headers
        df.rename(columns=translated_headers, inplace=True)
        df = df.loc[:, ~df.columns.str.startswith('Unnamed')]

        print("Updated DataFrame with Translated Headers:")
        print(df.head(5))

        return df

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

# Example usage
if __name__ == "__main__":
    file_path = "/content/Book5 (1).xlsx"  # Change this to your file path
    translated_df = translate_and_replace_headers(file_path)


Detected language for 'Tarih': sw
Original: Tarih -> Translated: History
Detected language for 'Hareket tipi': no
Original: Hareket tipi -> Translated: Movement type
Detected language for 'Açıklama': tr
Original: Açıklama -> Translated: Explanation
Detected language for 'İşlem Tutarı': tr
Original: İşlem Tutarı -> Translated: Transaction amount
Detected language for 'Bakiye': tr
Original: Bakiye -> Translated: Balance
Updated DataFrame with Translated Headers:
     History     Movement type  \
0 2024-08-27  Encard Harcaması   
1 2024-08-27        İptal/İade   
2 2024-08-27  Encard Harcaması   
3 2024-08-26  Encard Harcaması   
4 2024-08-26  Encard Harcaması   

                                         Explanation Transaction amount  \
0  WEBPOS SATIŞ Trendyol - Yemek       ISTANBUL  ...         -215,90 TL   
1  WEBPOS SATIŞ Trendyol - Yemek       ISTANBUL  ...          199,00 TL   
2  WEBPOS SATIŞ Trendyol - Yemek       ISTANBUL  ...         -199,00 TL   
3  WEBPOS SATIŞ Trendyol - Yem