In [2]:
import pandas as pd
import re

def remove_emojis(text):
    """Remove emojis from a given text."""
    emoji_pattern = re.compile(
        "[" 
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F700-\U0001F77F"  # alchemical symbols
        u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
        u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
        u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
        u"\U0001FA00-\U0001FA6F"  # Chess Symbols
        u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
        u"\U00002700-\U000027BF"  # Dingbats
        u"\U00002600-\U000026FF"  # Miscellaneous Symbols
        u"\U00002000-\U0000209F"  # General Punctuation
        u"\U00002300-\U000023FF"  # Miscellaneous Technical
        u"\U00002B50-\U00002BFF"  # Miscellaneous Symbols and Arrows
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

# Read the CSV file
input_csv = '../../data/rawdataset.csv'  # Replace with the path to your input CSV file
output_csv = 'output_comments.csv'  # Replace with the desired path for the output CSV file

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(input_csv)

# Check if 'Comments' column exists
if 'Comments' in df.columns:
    # Remove emojis from the 'Comments' column
    df['Comments'] = df['Comments'].apply(lambda x: remove_emojis(str(x)))
    
    # Remove rows where 'Comments' column is empty or contains only whitespace
    df = df[df['Comments'].str.strip().astype(bool)]
    
    # Save the cleaned DataFrame to a new CSV file
    df.to_csv(output_csv, index=False)
    print(f"Emojis removed and blank rows dropped. Cleaned data saved to '{output_csv}'")
else:
    print("Error: 'Comments' column not found in the input CSV file.")


Emojis removed and blank rows dropped. Cleaned data saved to 'output_comments.csv'
