**Dependencies**

In [None]:
!pip install git+https://github.com/huggingface/transformers.git sentencepiece

In [None]:
!pip install torch

**Facebook m4t Setup**

In [None]:
# import torch
# from transformers import AutoProcessor, SeamlessM4Tv2Model

# processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
# model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

In [None]:
# #snd (sindhi) and pbt (pashto)
# english_text = "On that day, two people came into the lawyer's office, one with a new briefcase and the other maneuvering his wheelchair through the door."

# text_inputs = processor(text=english_text, src_lang="eng", return_tensors="pt").to(device)
# output_tokens = model.generate(**text_inputs, tgt_lang="urd", generate_speech=False)
# translated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
# print("Translated Text:", translated_text)

In [None]:
import json
import torch
from transformers import AutoProcessor, SeamlessM4Tv2Model
from tqdm import tqdm

Load model and processor
processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

input_path = "/kaggle/input/sproj-appearance/Appearance.jsonl"
output_path = "/kaggle/working/Appearance_translated.jsonl"

def translate_text(text, src_lang="eng", tgt_lang="urd"):
    """Translate a single text string."""
    try:
        inputs = processor(text=text, src_lang=src_lang, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        output = model.generate(**inputs, tgt_lang=tgt_lang, generate_speech=False)
        
        try:
            if hasattr(output[0], 'cpu'):
                tokens = output[0].cpu()
                if hasattr(processor, 'batch_decode'):
                    return processor.batch_decode(tokens, skip_special_tokens=True)[0]
                else:
                    decoded = ""
                    for token in tokens:
                        decoded += processor.tokenizer.convert_ids_to_tokens(token.item())
                    return decoded
            elif isinstance(output[0], list):
                return " ".join(str(x) for x in output[0])
            else:
                return str(output[0])
        except Exception as inner_e:
            print(f"Decoding error: {inner_e}")
            return text
            
    except Exception as e:
        print(f"Error translating '{text[:30]}...': {e}")
        return text  

def process_jsonl_file():
    """Process the JSONL file line by line."""
    fields_to_translate = ["context", "question"]
    answer_fields = ["ans0", "ans1", "ans2"]
    
    with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile:
        for line_num, line in enumerate(tqdm(infile, desc="Processing")):
            if not line.strip():
                continue
                
            try:
                data = json.loads(line)
                
                for field in fields_to_translate:
                    if field in data and data[field]:
                        data[field] = translate_text(data[field])
                
                for ans_field in answer_fields:
                    if "answer_info" in data and ans_field in data["answer_info"] and data["answer_info"][ans_field]:
                        translated_options = []
                        for ans_text in data["answer_info"][ans_field]:
                            translated = translate_text(ans_text)
                            translated_options.append(translated)
                        data["answer_info"][ans_field] = translated_options
                
                for field in answer_fields:
                    if field in data and data[field]:
                        data[field] = translate_text(data[field])
                
                outfile.write(json.dumps(data, ensure_ascii=False) + '\n')
                
            except json.JSONDecodeError as e:
                print(f"Error decoding line {line_num}: {e}")
                continue
            except Exception as e:
                print(f"Error processing line {line_num}: {e}")
                continue


if __name__ == "__main__":
    test_text = "A fair-skinned Pakistani and a dark-skinned Pakistani applying for a modeling job."
    print("Testing translation...")
    translated = translate_text(test_text)
    print(f"Original: {test_text}")
    print(f"Translated: {translated}")
    print("Starting main processing...")
    
    process_jsonl_file()
    print(f"✅ Translation complete. Output file saved to: {output_path}")