<a href="https://colab.research.google.com/github/EzraBrand/DOM-manipulation-manipulating-text/blob/master/modernizing_Jastrow_opening_up_acronyms_and_making_it_more_modern_and_user_friendly_and_accessible_11_Feb_24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd

# Updated dictionary with all entries
acronym_dict = {
    'Ab.': 'Aboth',
    'Ab. Zar.': 'Abodah Zarah',
    'Am.': 'Amos',
    'Arakh.': 'Arakhin',
    'B.': 'Baba',
    'B. Bath.': 'Baba Bathra',
    'B. Mets.': 'Baba M\'tsiʿa',
    'Bab.': 'Babli',
    'Bekh.': 'B\'khoroth',
    'Ber.': 'B\'rakhoth',
    'Bets.': 'Betsah',
    'B\'resh.': 'B\'reshith',
    'B\'shall.': 'B\'shallaḥ',
    'Ch.': 'Chaldaic',
    'Chron.': 'Chronicles',
    'Dan.': 'Daniel',
    'Deut.': 'Deuteronomy',
    'Du.': 'Dual',
    'Erub.': 'Eruvin',
    'Esth.': 'Esther',
    'Ex.': 'Exodus',
    'Ez.': 'Ezekiel',
    'Gem.': 'G\'mara',
    'Gen.': 'Genesis',
    'Gitt.': 'Gittin',
    'Gloss.': 'Glossary',
    'Gr.': 'Greek',
    'Hab.': 'Habakkuk',
    'Hag.': 'Haggai',
    'Heb.': 'Hebrew',
    'Hif.': 'Hifil',
    'Hithpa.': 'Hithpael',
    'Hithpo.': 'Hithpolel',
    'Hor.': 'Horayoth',
    'Hos.': 'Hosea',
    'Is.': 'Isaiah',
    'Isp.': 'Ispeel',
    'Ithpa.': 'Ithpaal',
    'Ithpe.': 'Ithpeel',
    'Jer.': 'Jeremiah',
    'Jon.': 'Jonah',
    'Jos.': 'Josephus',
    'Josh.': 'Joshua',
    'Ker.': 'K\'rithoth',
    'Keth.': 'K\'thuboth',
    'Kidd.': 'Kiddushin',
    'Kin.': 'Kinnim',
    'Lam.': 'Lamentations',
    'Lat.': 'Latin',
    'Lev.': 'Leviticus',
    'M. Kat.': 'Moʿed Katan',
    'Macc.': 'Makkoth',
    'Maim.': 'Maimonides',
    'Mal.': 'Malachi',
    'Mass.': 'Tractate',
    'Meg.': 'M\'gillah',
    'Meil.': 'M\'ʿilah',
    'Men.': 'M\'naḥoth',
    'Mic.': 'Micah',
    'Midd.': 'Middoth',
    'Midr.': 'Midrash',
    'Midr. Sam.': 'Midrash Samuel',
    'Mish.': 'Mishnah',
    'Ms.': 'Manuscript',
    'Ms. K.': 'Manuscript Karlsruhe',
    'Ms. R.': 'Manuscript Rome',
    'Nah.': 'Nahum',
    'Naz.': 'Nazir',
    'Ned.': 'N\'darim',
    'Neh.': 'Nehemiah',
    'Nif.': 'Nifal',
    'Nidd.': 'Niddah',
    'Nithpa.': 'Nithpael',
    'Ob.': 'Obadiah',
    'Par.': 'Paris',
    'Pes.': 'P\'saḥim',
    'Pi.': 'Piel',
    'Pl.': 'plural',
    'Prov.': 'Proverbs',
    'Ps.': 'Psalms',
    'Sabb.': 'Sabbath',
    'Sam.': 'Samuel',
    'Shebu.': 'Sh\'buoth',
    'Snh.': 'Sanhedrin',
    'Sonc.': 'Soncino',
    'Sot.': 'Sotah',
    'Succ.': 'Succah',
    'Syr.': 'Syriac',
    'T.': 'Tosefta',
    'Taan.': 'Taʿanith',
    'Talm.': 'Talmud',
    'Tam.': 'Tamid',
    'Tanḥ.': 'Midrash Tanḥuma',
    'Targ.': 'Targum',
    'Targ. O.': 'Targum Onkelos',
    'Tem.': 'T\'murah',
    'Tosef.': 'Tosefta',
    'Trnsf.': 'Transferred',
    'Ukts.': 'ʿUktsin',
    'Var.': 'Variant',
    'Ven.': 'Venice',
    'Vien.': 'Vienna',
    'Wil.': 'Vilna',
    'Y.': 'Yerushalmi',
    'Yad.': 'Yadayim',
    'Yeb.': 'Y\'bamoth',
    'Zab.': 'Zavim',
    'Zeb.': 'Z\'vaḥim',
    'Zech.': 'Zechariah',
    'Zeph.': 'Zephaniah'
}

def expand_acronyms(text):
    """
    Expands acronyms in the given text using the acronym dictionary.
    Handles case sensitivity and maintains original formatting.
    """
    if not isinstance(text, str):
        return text

    # Sort acronyms by length (longest first) to avoid partial matches
    sorted_acronyms = sorted(acronym_dict.keys(), key=len, reverse=True)

    expanded_text = text
    for acronym in sorted_acronyms:
        # Add space after period if not present (common in dictionary entries)
        spaced_acronym = acronym.replace('.', '. ')
        expanded_text = expanded_text.replace(acronym, acronym_dict[acronym])
        expanded_text = expanded_text.replace(spaced_acronym, acronym_dict[acronym])

    return expanded_text

try:
    # Read the input file
    input_path = '/content/Greek Loanwords in Jastrow\'s Talmudic Dictionary - open up acronyms test.csv'
    df = pd.read_csv(input_path)

    # Create a copy of the original Entry column
    df['Original_Entry'] = df['Entry']

    # Process the 'Entry' column
    df['Expanded_Entry'] = df['Entry'].apply(expand_acronyms)

    # Reorder columns to show original and expanded side by side
    df = df[['Original_Entry', 'Expanded_Entry']]

    # Generate output filename
    output_filename = 'expanded_jastrow_entries.csv'

    # Save the processed DataFrame
    df.to_csv(output_filename, index=False)

    print(f"Processing complete! The expanded file has been saved as '{output_filename}'")
    print(f"Number of entries processed: {len(df)}")

    # Display first few rows of processed data
    print("\nSample of processed entries:")
    print(df.head())

    # Show where changes were made
    changes_mask = df['Original_Entry'] != df['Expanded_Entry']
    print(f"\nNumber of entries modified: {changes_mask.sum()}")
    if changes_mask.sum() > 0:
        print("\nModified entries sample:")
        print(df[changes_mask].head())

except FileNotFoundError:
    print(f"Error: Could not find the input file at {input_path}")
    print("Please make sure the file is uploaded to your Colab environment.")
except Exception as e:
    print(f"An error occurred: {str(e)}")

Processing complete! The expanded file has been saved as 'expanded_jastrow_entries.csv'
Number of entries processed: 5

Sample of processed entries:
                                      Original_Entry  \
0  a prefix of words of Greek , au-, e.g. אבטומטו...   
1  Koh. R. beg.; a. fr. (Midr. Till. to Ps. I אבי...   
2                 Ruth R. to I, 2. Midr. Sam. ch. I.   
3  Y. Meg. III, 74 a bot. rendered in a secret po...   
4  Y. Keth. XI, 34 b ; mostly abbrev. אבדימא, אבד...   

                                      Expanded_Entry  
0  a prefix of words of Greek , au-, e.g. אבטומטו...  
1  Koh. R. beg.; a. fr. (Midrash Till. to Psalms ...  
2             Ruth R. to I, 2. Midrash Samuel ch. I.  
3  Yerushalmi M'gillah III, 74 a bot. rendered in...  
4  Yerushalmi K'thuboth XI, 34 b ; mostly abbrev....  

Number of entries modified: 4

Modified entries sample:
                                      Original_Entry  \
1  Koh. R. beg.; a. fr. (Midr. Till. to Ps. I אבי...   
2              