In [2]:
import pandas as pd
import random

# Seed for reproducibility
random.seed(42)

# Base components for generating varied harsh comments
subjects = [
    "this", "this app", "this update", "this movie", "this game", "this service",
    "this feature", "this site", "this thing", "this product", "this crap",
    "this garbage", "this mess", "this disaster", "this junk", "this platform"
]

intensifiers = [
    "total", "complete", "absolute", "utter", "freaking", "damn", "fucking",
    "goddamn", "bloody", "sheer", "pure", "stupid", "pathetic", "lousy", "shitty"
]

negatives = [
    "sucks", "is awful", "is terrible", "is a disaster", "is garbage", "is trash",
    "is a joke", "is useless", "is broken", "is pathetic", "is a waste",
    "is crap", "is bullshit", "is a failure", "is the worst", "is shit"
]

sarcasm_starters = [
    "Oh, great.", "Wow, brilliant.", "Nice one.", "Oh, fantastic.", "Sure, perfect.",
    "Love this.", "So impressive.", "What a genius move.", "Really top-notch.",
    "Oh, yay.", "I’m thrilled.", "How wonderful.", "Great job.", "So amazing."
]

sarcasm_followups = [
    "Not.", "Yeah, right.", "As if.", "In your dreams.", "What a surprise.",
    "Said no one ever.", "Totally worth it.", "Couldn’t be better.",
    "Really nailed it.", "Just what I needed.", "Can’t get enough."
]

curse_extras = [
    "Fuck this.", "Screw this.", "This is bullshit.", "What the fuck?",
    "Goddamn it.", "This shit again?", "Piss off.", "Fucking hell.",
    "Shitty as always.", "Damn this crap."
]

# Function to generate a single harsh comment
def generate_harsh_comment():
    style = random.choice(["direct", "sarcastic", "curse"])
    
    if style == "direct":
        return f"{random.choice(subjects).capitalize()} {random.choice(intensifiers)} {random.choice(negatives)}."
    elif style == "sarcastic":
        return f"{random.choice(sarcasm_starters)} {random.choice(subjects).capitalize()} {random.choice(negatives)}. {random.choice(sarcasm_followups)}"
    else:  # curse
        return f"{random.choice(curse_extras)} {random.choice(subjects).capitalize()} {random.choice(negatives)}."

# Generate 1,250 harsh comments
n_comments = 30000
harsh_comments = [generate_harsh_comment() for _ in range(n_comments)]

# Ensure variety by adding some unique harsh phrases
extra_harsh = [
    "I’d rather die than use this again.",
    "Whoever made this deserves a slap.",
    "This is so bad it’s almost impressive.",
    "What a steaming pile of dog shit.",
    "I can’t believe I wasted my life on this.",
    "This is a fucking insult to intelligence.",
    "Oh, look, another masterpiece of failure.",
    "This trash isn’t worth the bytes it’s stored on.",
    "I’m done with this shitshow forever.",
    "Congrats on screwing up so spectacularly."
]
harsh_comments.extend(extra_harsh[:10])  # Add 10 unique ones
harsh_comments = harsh_comments[:n_comments]  # Trim to exactly 1,250 if over

# Create DataFrame
new_data = pd.DataFrame({
    "Comment": harsh_comments,
    "Sentiment": ["N"] * n_comments
})

# Save to CSV
output_file = "harsh_comments.csv"
new_data.to_csv(output_file, index=False)
print(f"Generated {n_comments} harsh comments and saved to '{output_file}'")

# Optional: Preview first few comments
print("\nSample comments:")
print(new_data.head(10))

Generated 30000 harsh comments and saved to 'harsh_comments.csv'

Sample comments:
                                       Comment Sentiment
0                  Screw this. This is broken.         N
1            This site absolute is a disaster.         N
2  Shitty as always. This update is a failure.         N
3                     This complete is a joke.         N
4                       This bloody is a joke.         N
5  Shitty as always. This disaster is useless.         N
6            Oh, yay. This thing sucks. As if.         N
7               Piss off. This crap is broken.         N
8            This feature pathetic is a waste.         N
9           This update fucking is a disaster.         N


In [5]:
import pandas as pd
import random

# Seed for reproducibility
random.seed(42)

# Base components for generating neutral comments
subjects = [
    "this", "this app", "this update", "this movie", "this game", "this service",
    "this feature", "this site", "this thing", "this product", "this platform",
    "this post", "this video", "this event", "this weather"
]

neutral_descriptors = [
    "is okay", "is fine", "is alright", "seems decent", "works well enough",
    "does the job", "is average", "is nothing special", "is typical",
    "is standard", "is passable", "is what it is", "is meh", "is so-so",
    "feels normal"
]

neutral_modifiers = [
    "I guess", "for now", "at least", "kind of", "sort of", "in a way",
    "more or less", "pretty much", "as expected", "not bad but not great",
    "could be worse", "no big deal", "just there", "fairly"
]

neutral_endings = [
    ".", " I suppose.", " whatever.", " that’s all.", " no complaints.",
    " it’s fine.", " oh well.", " nothing to say.", " same as always.",
    " can’t tell.", " doesn’t matter.", " anyway.", " if that matters."
]

# Function to generate a single neutral comment
def generate_neutral_comment():
    style = random.choice(["simple", "modified", "longer"])
    
    if style == "simple":
        return f"{random.choice(subjects).capitalize()} {random.choice(neutral_descriptors)}."
    elif style == "modified":
        return f"{random.choice(subjects).capitalize()} {random.choice(neutral_modifiers)} {random.choice(neutral_descriptors)}."
    else:  # longer
        return f"{random.choice(subjects).capitalize()} {random.choice(neutral_descriptors)} {random.choice(neutral_modifiers)}{random.choice(neutral_endings)}"

# Generate 3000 neutral comments
n_comments = 10000
neutral_comments = [generate_neutral_comment() for _ in range(n_comments)]

# Add some hand-crafted neutral comments for variety
extra_neutral = [
    "It’s just another day on this platform.",
    "This is neither here nor there.",
    "I don’t have strong feelings about this.",
    "This update is okay, nothing to write home about.",
    "It works, but I’m not impressed or upset.",
    "This movie was fine, I guess I’d watch it again.",
    "Not sure what to think of this, it’s just there.",
    "This feature exists, that’s about it.",
    "I’ve seen worse, I’ve seen better.",
    "This thing is alright, no real opinion."
]
neutral_comments.extend(extra_neutral[:10])  # Add 10 unique ones
neutral_comments = neutral_comments[:n_comments]  # Trim to exactly 1,250 if over

# Create DataFrame
new_data = pd.DataFrame({
    "Comment": neutral_comments,
    "Sentiment": ["NU"] * n_comments
})

# Save to CSV
output_file = "neutral_comments.csv"
new_data.to_csv(output_file, index=False)
print(f"Generated {n_comments} neutral comments and saved to '{output_file}'")

# Preview first few comments
print("\nSample comments:")
print(new_data.head(10))

Generated 10000 neutral comments and saved to 'neutral_comments.csv'

Sample comments:
                                            Comment Sentiment
0       This app is okay no big deal no complaints.        NU
1                            This movie is alright.        NU
2  This app is passable no big deal same as always.        NU
3                          This product is average.        NU
4                                     This is fine.        NU
5                            This movie is typical.        NU
6                   This is typical kind of anyway.        NU
7     This post is typical more or less that’s all.        NU
8                      This product sort of is meh.        NU
9                                This video is meh.        NU


In [6]:
import pandas as pd

# Load existing dataset (with harsh negatives if already added)
df = pd.read_csv("fb_labeled_with_emojis.csv")  # or "fb_labeled.csv" if not yet updated

# Load generated harsh negative comments
harsh_df = pd.read_csv("harsh_comments.csv")

# Load generated neutral comments
neutral_df = pd.read_csv("neutral_comments.csv")

# Combine all
df_balanced = pd.concat([df, harsh_df,neutral_df], ignore_index=True)
df_balanced.to_csv("fb_labeled_balanced.csv", index=False)
print(f"Balanced dataset saved to 'fb_labeled_balanced.csv' with {len(df_balanced)} rows")
print("New Class Distribution:")
print(df_balanced["Sentiment"].value_counts())

Balanced dataset saved to 'fb_labeled_balanced.csv' with 174394 rows
New Class Distribution:
Sentiment
P     84438
N     60739
NU    29217
Name: count, dtype: int64


In [1]:
import json
from nbformat import read as read_notebook
import os

def extract_code_from_ipynb(input_file, output_file):
    try:
        # Read the notebook
        with open(input_file, 'r', encoding='utf-8') as f:
            notebook = read_notebook(f, as_version=4)
            
        # Extract code cells
        code_cells = []
        for cell in notebook.cells:
            if cell.cell_type == 'code':
                # Join lines and add newline between cells
                code = ''.join(cell.source) + '\n\n'
                code_cells.append(code)
                
        # Combine all code
        full_code = '\n'.join(code_cells)
        
        # Write to output file
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(full_code)
            
        print(f"✅ Successfully extracted code to: {output_file}")
            
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        print("Make sure the input file is a valid .ipynb notebook")

if __name__ == "__main__":
    # Prompt user for input file name
    input_file = input("Enter the path to the input .ipynb file: ").strip()
    
    # Check if the input file exists
    if not os.path.isfile(input_file):
        print(f"❌ Error: The file '{input_file}' does not exist.")
        exit(1)
    
    # Prompt user for output file name (optional)
    output_file = input("Enter the path to the output .py file (leave blank for default): ").strip()
    
    # Set default output file name if not provided
    if not output_file:
        output_file = os.path.splitext(input_file)[0] + '.py'
    
    # Safeguard 1: Ensure input and output file names are not the same
    if os.path.abspath(input_file) == os.path.abspath(output_file):
        print("❌ Error: Input and output file names cannot be the same to prevent overwriting.")
        exit(1)
    
    # Safeguard 2: Ensure the output file does not have the .ipynb extension
    if output_file.endswith('.ipynb'):
        print("❌ Error: Output file cannot have the .ipynb extension. Please use a different extension (e.g., .py).")
        exit(1)
    
    # Run the conversion
    extract_code_from_ipynb(input_file, output_file)

✅ Successfully extracted code to: face.py


In [1]:
import pandas as pd
import random

# Seed for reproducibility
random.seed(42)

# Define components for text generation
subjects = [
    "this app", "this update", "this movie", "this game", "this service",
    "this feature", "this site", "this post", "this video", "this weather"
]

# Positive components
positive_descriptors = [
    "is awesome", "is great", "is amazing", "is fantastic", "is perfect",
    "works wonderfully", "is the best", "is lovely", "is impressive", "is fun"
]
positive_modifiers = [
    "really", "so", "very", "totally", "absolutely", "definitely",
    "always", "super", "incredibly", "honestly"
]
positive_emojis = ["😊", "👍", "🎉", "❤️", "😍", "✨", "🌟", "😎", "🥳", "💖"]

# Negative components
negative_descriptors = [
    "is terrible", "is awful", "sucks", "is garbage", "is a mess",
    "is broken", "is useless", "is the worst", "is crap", "is a disaster"
]
negative_modifiers = [
    "so", "really", "totally", "utterly", "completely", "f***ing",
    "damn", "bloody", "stupidly", "pathetically"
]
negative_emojis = ["😡", "👎", "😞", "💔", "😣", "🤬", "😤", "😢", "🤮", "😠"]
sarcastic_starters = [
    "Oh, great", "Wow, brilliant", "Nice one", "Sure, perfect", "Love this"
]

# Neutral components
neutral_descriptors = [
    "is okay", "is fine", "is alright", "seems decent", "works well enough",
    "is average", "is nothing special", "is typical", "is standard", "is meh"
]
neutral_modifiers = [
    "kind of", "sort of", "pretty much", "as expected", "not bad but not great",
    "could be worse", "no big deal", "just", "fairly", "more or less"
]
neutral_emojis = ["😐", "🤷‍♂️", "🤔", "😶", "🙃", "😑", "🤨", "🧐", "😕", "🤐"]

# Function to generate a single comment with emoji
def generate_comment(sentiment):
    subject = random.choice(subjects).capitalize()
    
    if sentiment == "P":
        text = f"{subject} {random.choice(positive_modifiers)} {random.choice(positive_descriptors)}"
        emoji = random.choice(positive_emojis)
        return f"{text} {emoji}", "P"
    
    elif sentiment == "N":
        style = random.choice(["direct", "sarcastic"])
        if style == "direct":
            text = f"{subject} {random.choice(negative_modifiers)} {random.choice(negative_descriptors)}"
        else:
            text = f"{random.choice(sarcastic_starters)}, {subject} {random.choice(negative_descriptors)}"
        emoji = random.choice(negative_emojis)
        return f"{text} {emoji}", "N"
    
    else:  # NU
        text = f"{subject} {random.choice(neutral_modifiers)} {random.choice(neutral_descriptors)}"
        emoji = random.choice(neutral_emojis)
        return f"{text} {emoji}", "NU"

# Generate 5,000 comments (roughly balanced)
n_total = 10000
n_per_class = n_total // 3  # ~1666 each
extra = n_total % 3         # Distribute remainder

comments = []
sentiments = ["P"] * (n_per_class + (1 if extra > 0 else 0)) + \
             ["N"] * (n_per_class + (1 if extra > 1 else 0)) + \
             ["NU"] * n_per_class

random.shuffle(sentiments)  # Randomize order

for sentiment in sentiments:
    comment, label = generate_comment(sentiment)
    comments.append({"Comment": comment, "Sentiment": label})

# Create DataFrame
df_emoji = pd.DataFrame(comments)

# Add some emoji-only examples (10 per class)
emoji_only = [
    {"Comment": "😊👍", "Sentiment": "P"},
    {"Comment": "😍✨", "Sentiment": "P"},
    {"Comment": "🎉❤️", "Sentiment": "P"},
    {"Comment": "😡👎", "Sentiment": "N"},
    {"Comment": "🤬😤", "Sentiment": "N"},
    {"Comment": "💔😞", "Sentiment": "N"},
    {"Comment": "😐🤷‍♂️", "Sentiment": "NU"},
    {"Comment": "🤔😶", "Sentiment": "NU"},
    {"Comment": "🙃😑", "Sentiment": "NU"},
    {"Comment": "😕🧐", "Sentiment": "NU"}
]
df_emoji = pd.concat([df_emoji, pd.DataFrame(emoji_only)], ignore_index=True)

# Save to CSV
output_file = "emoji_comments.csv"
df_emoji.to_csv(output_file, index=False)
print(f"Generated {len(df_emoji)} comments with emojis and saved to '{output_file}'")

# Show class distribution
print("Class Distribution:")
print(df_emoji["Sentiment"].value_counts())

# Preview samples
print("\nSample comments:")
print(df_emoji.head(10))

Generated 10010 comments with emojis and saved to 'emoji_comments.csv'
Class Distribution:
Sentiment
NU    3337
P     3337
N     3336
Name: count, dtype: int64

Sample comments:
                               Comment Sentiment
0    This weather really is terrible 😡         N
1   This site could be worse is fine 🤔        NU
2              This app just is okay 🙃        NU
3       This weather always is great 👍         P
4     This app completely is useless 😤         N
5  This update honestly is the best ❤️         P
6  This weather more or less is okay 🧐        NU
7        This site super is the best 💖         P
8  This weather so works wonderfully 😊         P
9        This post fairly is average 🤐        NU


In [3]:
import pandas as pd

# Load existing dataset
df = pd.read_csv("fb_labeled_balanced.csv")

# Load emoji data
emoji_df = pd.read_csv("emoji_comments.csv")

# Combine and save
df_with_emojis = pd.concat([df, emoji_df], ignore_index=True)
df_with_emojis.to_csv("fb_labeled_with_emojis.csv", index=False)
print(f"Updated dataset saved to 'fb_labeled_with_emojis.csv' with {len(df_with_emojis)} rows")
print("New Class Distribution:")
print(df_with_emojis["Sentiment"].value_counts())

Updated dataset saved to 'fb_labeled_with_emojis.csv' with 134394 rows
New Class Distribution:
Sentiment
P     84438
N     30739
NU    19217
Name: count, dtype: int64
