In [1]:
import pandas as pd
from collections import Counter

# Example dataset of emails
data = {
    "email_id": [1, 2, 3],
    "email_content": [
        "Hello, this is a test email. This email is for testing.",
        "Hi, the meeting is scheduled for tomorrow. Please confirm your attendance.",
        "Reminder: the project deadline is tomorrow. Submit your files."
    ]
}

# Convert the dataset into a DataFrame
df = pd.DataFrame(data)

# Method to count word frequency across all emails
def count_word_frequencies(df):
    """
    Create a table where rows represent emails and columns represent unique words.

    Args:
        df (pd.DataFrame): DataFrame containing emails.

    Returns:
        pd.DataFrame: DataFrame with word frequencies.
    """
    # Initialize a list to store word frequencies for each email
    all_word_counts = []

    for content in df["email_content"]:
        # Tokenize the email content into words and normalize to lowercase
        words = content.lower().split()
        # Count the occurrences of each word
        word_counts = Counter(words)
        all_word_counts.append(word_counts)

    # Create a set of all unique words across all emails
    all_unique_words = set(word for counts in all_word_counts for word in counts)

    # Create a DataFrame with rows as emails and columns as words
    word_frequency_df = pd.DataFrame(
        [{word: counts.get(word, 0) for word in all_unique_words} for counts in all_word_counts],
        index=[f"Email {i+1}" for i in range(len(all_word_counts))]
    )

    return word_frequency_df

# Generate the word frequency table
word_frequency_table = count_word_frequencies(df)

# Display the table
print(word_frequency_table)

         deadline  project  email.  hi,  submit  a  testing.  test  hello,  \
Email 1         0        0       1    0       0  1         1     1       1   
Email 2         0        0       0    1       0  0         0     0       0   
Email 3         1        1       0    0       1  0         0     0       0   

         this  ...  files.  your  for  tomorrow.  please  confirm  \
Email 1     2  ...       0     0    1          0       0        0   
Email 2     0  ...       0     1    1          1       1        1   
Email 3     0  ...       1     1    0          1       0        0   

         attendance.  is  scheduled  reminder:  
Email 1            0   2          0          0  
Email 2            1   1          1          0  
Email 3            0   1          0          1  

[3 rows x 23 columns]
