In [None]:
import pandas as pd

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_merged.tsv"
OUTPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

def generate_clean_analysis(row):
    translation = row["target_en"].strip()
    
    analysis = (
        f'The translation is "{translation}"\n'
        f'By double checking, I see the translation is correct\n'
        f'The correct translation is "{translation}"'
    )
    
    return analysis

# apply to clean_translation rows 
for idx, row in df.iterrows():
    if row["data_type"].lower() == "clean_translation":
        if pd.isna(row.get("analysis", "")) or row.get("analysis", "").strip() == "":
            df.at[idx, "analysis"] = generate_clean_analysis(row)

df.to_csv(OUTPUT_TSV, sep="\t", index=False)

print(f"Updated dataset saved to: {OUTPUT_TSV}")
print(f"Total rows: {len(df)}")
print(f"Clean rows with analysis: {len(df[df['data_type'] == 'clean_translation'])}")
print(f"Error rows: {len(df[df['data_type'] == 'error_correction'])}")

Updated dataset saved to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis.tsv
Total rows: 1472
Clean rows with analysis: 736
Error rows: 736


In [2]:
import pandas as pd
import re

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

error_df = df[df["data_type"].str.lower() == "error_correction"].copy()

has_model_part = error_df["analysis"].str.contains(r"The model translation is", case=False, na=False)
has_correct_part = error_df["analysis"].str.contains(r"The correct translation", case=False, na=False)

error_df["has_model_translation"] = has_model_part
error_df["has_correct_translation"] = has_correct_part
error_df["has_expected_format"] = has_model_part & has_correct_part

total = len(error_df)
with_model = has_model_part.sum()
with_correct = has_correct_part.sum()
fully_formatted = error_df["has_expected_format"].sum()
not_formatted = total - fully_formatted

print(f"Total error_correction rows: {total}")
print(f"Rows with 'The model translation is': {with_model}")
print(f"Rows with 'The correct translation': {with_correct}")
print(f"Rows with both (expected format): {fully_formatted}")
print(f"Rows NOT matching expected format: {not_formatted}")

if not_formatted > 0:
    print("\n Rows not following expected format:")
    display(error_df.loc[~error_df["has_expected_format"], ["source_de", "predicted_en", "target_en", "analysis"]].head(10))


Total error_correction rows: 736
Rows with 'The model translation is': 736
Rows with 'The correct translation': 736
Rows with both (expected format): 736
Rows NOT matching expected format: 0


In [3]:
import pandas as pd
import re

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis.tsv"
OUTPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis_formatted.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

mask_err = df["data_type"].str.lower() == "error_correction"
err = df.loc[mask_err].copy()

has_model = err["analysis"].str.contains(r"The model translation is", case=False, na=False)
has_correct = err["analysis"].str.contains(r"The correct translation", case=False, na=False)
print(f"Total error_correction rows: {len(err)}")
print(f"Rows with 'The model translation is': {has_model.sum()}")
print(f"Rows with 'The correct translation': {has_correct.sum()}")

def reflow_analysis(text: str) -> str:
    if not isinstance(text, str):
        return text

    original = text

    text = re.sub(
        r'(The model translation is.*?"\.?)\s+(?=[A-Z])',
        r'\1\n',
        text,
        flags=re.IGNORECASE | re.DOTALL
    )

    text = re.sub(
        r'(?<!\n)\s*(The correct translation\b)',
        r'\n\1',
        text,
        flags=re.IGNORECASE
    )

    text = re.sub(r'\n{2,}', '\n', text).strip()

    return text

reflowed = err["analysis"].apply(reflow_analysis)

changed_mask = (reflowed != err["analysis"])
print(f"Rows modified (had paragraph-y analysis): {changed_mask.sum()}")

df.loc[mask_err, "analysis"] = reflowed
df.to_csv(OUTPUT_TSV, sep="\t", index=False)
print(f"Saved reflowed file to: {OUTPUT_TSV}")


Total error_correction rows: 736
Rows with 'The model translation is': 736
Rows with 'The correct translation': 736
Rows modified (had paragraph-y analysis): 736
Saved reflowed file to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis_formatted.tsv


In [4]:
for i, text in enumerate(err["analysis"].head(3), 1):
    print(f"\n--- Row {i} ---\n{text}\n-------------------")



--- Row 1 ---
The model translation is "On Wednesday, the mileage was cut by another 25%." The German "noch einmal auf 25 Prozent der Flugstrecke gekürzt" should be "slashed the mileage award to 25 percent of the miles of the trip" not "cut by another 25%" - this is about reducing TO 25%, not BY 25%. The word "Meilen" should be "mileage award" not just "mileage" - this is about reward points, not distance measurement. The phrase "der Flugstrecke" should be "of the miles of the trip" not omitted entirely. The grammatical structure "cut by" versus "slashed...to" changes the mathematical meaning significantly. The correct translation should be "On Wednesday it slashed the mileage award to 25 percent of the miles of the trip."
-------------------

--- Row 2 ---
The model translation is "Despite the months of debate about the photos of Özil with the Turkish President, Mr. Özil's resignation is to be regretted." The phrase "ist zu bedauern" means "is to be regretted" which the model got cor

In [5]:
import pandas as pd

input_file = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_with_clean_analysis_formatted.tsv"
output_file = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_fixed.tsv"

END_SENT = "|||END|||"

df = pd.read_csv(input_file, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

print(f"Loaded {len(df)} rows")
print(f"Columns: {df.columns.tolist()}")

fixed_count = 0
for idx in range(len(df)):
    analysis = df.at[idx, "analysis"].rstrip()
    if analysis and not analysis.endswith(END_SENT):
        df.at[idx, "analysis"] = f"{analysis}{END_SENT}"
        fixed_count += 1

print(f"Fixed {fixed_count} rows by adding {END_SENT}")

count_with_end = df["analysis"].str.endswith(END_SENT).sum()
print(f"Verification: {count_with_end}/{len(df)} rows now end with {END_SENT}")

print("\nSample fixed analyses:")
for i in range(3):
    print(f"\nRow {i} - Last 80 chars:")
    print(f"...{df.iloc[i]['analysis'][-80:]}")

df.to_csv(output_file, sep="\t", index=False)
print(f"\nSaved fixed data to: {output_file}")

Loaded 1472 rows
Columns: ['source_de', 'target_en', 'predicted_en', 'error_type', 'analysis', 'data_type']
Fixed 1472 rows by adding |||END|||
Verification: 1472/1472 rows now end with |||END|||

Sample fixed analyses:

Row 0 - Last 80 chars:
...y it slashed the mileage award to 25 percent of the miles of the trip."|||END|||

Row 1 - Last 80 chars:
...and shame report, which I fear will alienate many of the stakeholders."|||END|||

Row 2 - Last 80 chars:
...p Erdogan, he regrets the return of the 92-match national player Özil."|||END|||

Saved fixed data to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/de_en/self_correction_isolated_training_fixed.tsv


In [6]:
###chinese to english 

In [7]:
import pandas as pd

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_mixed.tsv"
OUTPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

def generate_clean_analysis(row):
    translation = row["target_en"].strip()
    
    analysis = (
        f'The model translation is "{translation}"\n'
        f'By double checking, I see the translation is correct.\n'
        f'The correct translation is "{translation}"'
    )
    
    return analysis

for idx, row in df.iterrows():
    if row["data_type"].lower() == "clean_translation":
        if pd.isna(row.get("analysis", "")) or row.get("analysis", "").strip() == "":
            df.at[idx, "analysis"] = generate_clean_analysis(row)

df.to_csv(OUTPUT_TSV, sep="\t", index=False)

print(f"Updated dataset saved to: {OUTPUT_TSV}")
print(f"Total rows: {len(df)}")
print(f"Clean rows with analysis: {len(df[df['data_type'] == 'clean_translation'])}")
print(f"Error rows: {len(df[df['data_type'] == 'error_correction'])}")

Updated dataset saved to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv
Total rows: 1600
Clean rows with analysis: 800
Error rows: 800


In [8]:
import pandas as pd
import re

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

error_df = df[df["data_type"].str.lower() == "error_correction"].copy()

has_model_part = error_df["analysis"].str.contains(r"The model translation is", case=False, na=False)
has_correct_part = error_df["analysis"].str.contains(r"The correct translation", case=False, na=False)

error_df["has_model_translation"] = has_model_part
error_df["has_correct_translation"] = has_correct_part
error_df["has_expected_format"] = has_model_part & has_correct_part

total = len(error_df)
with_model = has_model_part.sum()
with_correct = has_correct_part.sum()
fully_formatted = error_df["has_expected_format"].sum()
not_formatted = total - fully_formatted

print(f"Total error_correction rows: {total}")
print(f"Rows with 'The model translation is': {with_model}")
print(f"Rows with 'The correct translation': {with_correct}")
print(f"Rows with both (expected format): {fully_formatted}")
print(f"Rows NOT matching expected format: {not_formatted}")

if not_formatted > 0:
    print("\n Rows not following expected format:")
    display(error_df.loc[~error_df["has_expected_format"], ["source_zh", "predicted_en", "target_en", "analysis"]].head(10))


Total error_correction rows: 800
Rows with 'The model translation is': 800
Rows with 'The correct translation': 799
Rows with both (expected format): 799
Rows NOT matching expected format: 1

 Rows not following expected format:


Unnamed: 0,source_zh,predicted_en,target_en,analysis
7,类似的药方 — — 以及包括大幅通胀的药方 — — 颇有市场，以往内对付经济停滞的常规手段已...,Such prescriptions – and others that include s...,Prescriptions like these – as with those for a...,"The model translation is ""Such prescriptions –..."


In [9]:
import pandas as pd

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"
forcheckingdf = pd.read_csv(INPUT_TSV, sep='\t')

search_text = "Prescriptions like these – as with those for a jolt of inflation – have gained ground because the obvious solutions to economic stagnation have been tried and failed.policy makers, if not more so."

match = forcheckingdf[forcheckingdf['target_en'].str.strip() == search_text]

print(match)

if match.empty:
    print("\nNo exact match found — trying partial search...")
    match = forcheckingdf[forcheckingdf['target_en'].str.contains("Prescriptions like these", case=False, na=False)]
    print(match)

if not match.empty:
    print("\nRow index:", match.index.tolist())
    print("\nSelected columns:")
    print(match[['source_zh', 'target_en', 'predicted_en', 'error type - single', 'analysis']].to_string(index=False))


Empty DataFrame
Columns: [source_zh, target_en, predicted_en, error type - single, analysis, data_type]
Index: []

No exact match found — trying partial search...
                                           source_zh  \
7  类似的药方 — — 以及包括大幅通胀的药方 — — 颇有市场，以往内对付经济停滞的常规手段已...   

                                           target_en  \
7  Prescriptions like these – as with those for a...   

                                        predicted_en error type - single  \
7  Such prescriptions – and others that include s...       Lexical Error   

                                            analysis         data_type  
7  The model translation is "Such prescriptions –...  error_correction  

Row index: [7]

Selected columns:
                                              source_zh                                                                                                                                                              target_en                                                       

In [10]:
import pandas as pd

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"
forcheckingdf = pd.read_csv(INPUT_TSV, sep='\t')

new_analysis = '''The model translation is "Such prescriptions – and others that include substantial inflation – are in vogue, and conventional remedies for economic stagnation that have been tried and found wanting are no longer an option." The model changes "prescriptions like these" to "such prescriptions – and others that include substantial inflation," making the inflation reference sound like a separate category when it should be included in "like these." The phrase "no longer an option" is also an addition. The correct translation is "Prescriptions like these – as with those for a jolt of inflation – have gained ground because the obvious solutions to economic stagnation have been tried and failed."'''

forcheckingdf.loc[7, 'analysis'] = new_analysis

print("Updated analysis for row 7:")
print(forcheckingdf.loc[7, 'analysis'])

forcheckingdf.to_csv(INPUT_TSV, sep='\t', index=False)
print("\nChange saved successfully to:", INPUT_TSV)


Updated analysis for row 7:
The model translation is "Such prescriptions – and others that include substantial inflation – are in vogue, and conventional remedies for economic stagnation that have been tried and found wanting are no longer an option." The model changes "prescriptions like these" to "such prescriptions – and others that include substantial inflation," making the inflation reference sound like a separate category when it should be included in "like these." The phrase "no longer an option" is also an addition. The correct translation is "Prescriptions like these – as with those for a jolt of inflation – have gained ground because the obvious solutions to economic stagnation have been tried and failed."

Change saved successfully to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv


In [11]:
import pandas as pd
import re

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"
OUTPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_formatting.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

mask_err = df["data_type"].str.lower() == "error_correction"
err = df.loc[mask_err].copy()

has_model = err["analysis"].str.contains(r"The model translation is", case=False, na=False)
has_correct = err["analysis"].str.contains(r"The correct translation", case=False, na=False)
print(f"Total error_correction rows: {len(err)}")
print(f"Rows with 'The model translation is': {has_model.sum()}")
print(f"Rows with 'The correct translation': {has_correct.sum()}")

def reflow_analysis(text: str) -> str:
    if not isinstance(text, str):
        return text

    original = text

    text = re.sub(
        r'(The model translation is.*?"\.?)\s+(?=[A-Z])',
        r'\1\n',
        text,
        flags=re.IGNORECASE | re.DOTALL
    )

    # line break 
    text = re.sub(
        r'(?<!\n)\s*(The correct translation\b)',
        r'\n\1',
        text,
        flags=re.IGNORECASE
    )

    #  trim extra blank lines 
    text = re.sub(r'\n{2,}', '\n', text).strip()

    return text

reflowed = err["analysis"].apply(reflow_analysis)

changed_mask = (reflowed != err["analysis"])
print(f"Rows modified (had paragraph-y analysis): {changed_mask.sum()}")

df.loc[mask_err, "analysis"] = reflowed
df.to_csv(OUTPUT_TSV, sep="\t", index=False)
print(f"Saved reflowed file to: {OUTPUT_TSV}")


Total error_correction rows: 800
Rows with 'The model translation is': 800
Rows with 'The correct translation': 800
Rows modified (had paragraph-y analysis): 800
Saved reflowed file to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_formatting.tsv


In [12]:
for i, text in enumerate(err["analysis"].head(3), 1):
    print(f"\n--- Row {i} ---\n{text}\n-------------------")



--- Row 1 ---
The model translation is "If you play the Russian roulette with just one or two bullets, your chances of survival are certainly better than if you play with six, but the stakes are so high – or the value of what you are betting is so low – that it is not a smart bet." The model adds a comparison to playing with six bullets that is not in the original text. The original statement is about how even with good survival odds, the stakes make it unwise. The model also changes "you are more likely to survive than not" to "your chances of survival are certainly better than if you play with six," which introduces an unnecessary comparison. The phrase "smart bet" is less formal than the original's "wise gamble." The model also loses the emphasis on how extraordinarily high the stakes would need to be or how low one's self-valuation must be. The correct translation should be "If you play Russian roulette with one or two bullets in the barrel, you are more likely to survive than not

In [13]:
pattern = r"The model translation is.*The correct translation"

matches = err["analysis"].str.contains(pattern, flags=re.IGNORECASE | re.DOTALL, na=False)

total_err = len(err)
matching_rows = matches.sum()
missing_rows = total_err - matching_rows

print(f"\nStructure check:")
print(f"Total 'error_correction' rows: {total_err}")
print(f"Rows containing BOTH sections ('The model translation is' ... 'The correct translation'): {matching_rows}")
print(f"Rows missing one or both sections: {missing_rows}")

if missing_rows > 0:
    print("\n Rows missing structure:")
    display(err.loc[~matches, ["source_zh", "target_en", "analysis"]].head(5))



Structure check:
Total 'error_correction' rows: 800
Rows containing BOTH sections ('The model translation is' ... 'The correct translation'): 800
Rows missing one or both sections: 0


In [14]:
import pandas as pd
import re

INPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis.tsv"
OUTPUT_TSV = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_formatted.tsv"

df = pd.read_csv(INPUT_TSV, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

mask_err = df["data_type"].str.lower() == "error_correction"
err = df.loc[mask_err].copy()

pattern = r"The model translation is.*The correct translation"
matches = err["analysis"].str.contains(pattern, flags=re.IGNORECASE | re.DOTALL, na=False)

print(f"\nStructure check:")
print(f"Total 'error_correction' rows: {len(err)}")
print(f"Rows with both 'The model translation is' and 'The correct translation': {matches.sum()}")
print(f"Rows missing one or both sections: {len(err) - matches.sum()}")

def reflow_analysis(text: str) -> str:
    """Add line breaks for readability between main analysis sections."""
    if not isinstance(text, str):
        return text

    text = re.sub(
        r'(The model translation is.*?"\.?)\s+(?=[A-Z])',
        r'\1\n',
        text,
        flags=re.IGNORECASE | re.DOTALL
    )

    text = re.sub(
        r'(?<!\n)\s*(The correct translation\b)',
        r'\n\1',
        text,
        flags=re.IGNORECASE
    )

    text = re.sub(r'\n{2,}', '\n', text).strip()

    return text

reflowed = err["analysis"].where(~matches, err["analysis"].apply(reflow_analysis))

changed_mask = (reflowed != err["analysis"])
print(f"\n Rows reformatted (added clean paragraph breaks): {changed_mask.sum()}")

df.loc[mask_err, "analysis"] = reflowed
df.to_csv(OUTPUT_TSV, sep="\t", index=False)
print(f"\nFormatted file saved to:\n{OUTPUT_TSV}")



Structure check:
Total 'error_correction' rows: 800
Rows with both 'The model translation is' and 'The correct translation': 800
Rows missing one or both sections: 0

 Rows reformatted (added clean paragraph breaks): 800

Formatted file saved to:
/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_formatted.tsv


In [None]:
for i, text in enumerate(df.loc[mask_err, "analysis"].head(5), 1):
    print(f"\n--- error_correction Row {i} ---\n{text}\n-------------------")


In [16]:
import pandas as pd

input_file = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_formatted.tsv"
output_file = "/sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_fixed.tsv"

END_SENT = "|||END|||"

df = pd.read_csv(input_file, sep="\t", dtype=str, keep_default_na=False)
df.columns = [c.strip().lower() for c in df.columns]

print(f"Loaded {len(df)} rows")
print(f"Columns: {df.columns.tolist()}")

fixed_count = 0
for idx in range(len(df)):
    analysis = df.at[idx, "analysis"].rstrip()
    if analysis and not analysis.endswith(END_SENT):
        df.at[idx, "analysis"] = f"{analysis}{END_SENT}"
        fixed_count += 1

print(f"Fixed {fixed_count} rows by adding {END_SENT}")


count_with_end = df["analysis"].str.endswith(END_SENT).sum()
print(f"Verification: {count_with_end}/{len(df)} rows now end with {END_SENT}")

print("\nSample fixed analyses:")
for i in range(3):
    print(f"\nRow {i} - Last 80 chars:")
    print(f"...{df.iloc[i]['analysis'][-80:]}")

df.to_csv(output_file, sep="\t", index=False)
print(f"\nSaved fixed data to: {output_file}")

Loaded 1600 rows
Columns: ['source_zh', 'target_en', 'predicted_en', 'error type - single', 'analysis', 'data_type']
Fixed 1600 rows by adding |||END|||
Verification: 1600/1600 rows now end with |||END|||

Sample fixed analyses:

Row 0 - Last 80 chars:
... place on your life inordinately low – for this to seem a wise gamble."|||END|||

Row 1 - Last 80 chars:
...s – foisting more debt onto a country that has too much of it already."|||END|||

Row 2 - Last 80 chars:
...elatively well, and are in all likelihood followed by similar periods."|||END|||

Saved fixed data to: /sc/home/sandeep.uprety/thesis_project/self_correction_llm_based_translation_thesis/data/isolated_clean/zh_en/chinese_english_analysis_with_clean_analysis_fixed.tsv
