In [3]:
import pandas as pd
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Sample DataFrame with a column of texts
data = {'text_column': ["for - age is an example.",
                       "This is another - example.",
                       "Hyphen - at the beginning."]}

df = pd.DataFrame(data)

# Define a function to merge hyphenated words
def merge_hyphenated_words(text):
    try:
        # Process the text with spaCy
        doc = nlp(text)
    
        # Initialize a list to store the merged words
        merged_words = []
    
        # Iterate through the tokens
        for token in doc:
            # Check if the token contains a hyphen and is not a punctuation mark
            if '-' in token.text and not token.is_punct:
                # Merge the token text by removing the hyphen
                merged_word = token.text.replace('-', '')
                merged_words.append(merged_word)
            else:
                merged_words.append(token.text)
    
        # Join the merged words back into a sentence
        merged_text = " ".join(merged_words)
    
        return merged_text
    except Exception as e:
        # Handle any exceptions and return the original text if there's an issue
        return text

# Apply the merge_hyphenated_words function to the 'text_column' and create a new column 'merged_text'
df['merged_text'] = df['text_column'].apply(merge_hyphenated_words)

# Print the DataFrame with merged words
print(df)


                  text_column                  merged_text
0    for - age is an example.    for - age is an example .
1  This is another - example.  This is another - example .
2  Hyphen - at the beginning.  Hyphen - at the beginning .
