In [None]:
# Install dependencies in Colab
!pip install transformers datasets torch pandas nltk gradio

In [None]:
import random
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset
import torch
import nltk
from nltk.corpus import wordnet
import os
from datetime import datetime
import gradio as gr

In [None]:
# Download required NLTK data
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')

# Set random seed based on current time for varied outputs
random.seed(int(datetime.now().timestamp()))

In [None]:
# Define themes and sample data
THEMES = ["career_advice", "industry_insights", "personal_achievement"]
TOPICS = ["career growth", "work-life balance", "leadership", "productivity", "team collaboration", "innovation"]
INDUSTRIES = ["tech", "finance", "healthcare", "education", "marketing", "consulting"]
ACHIEVEMENTS = ["a promotion", "completing a major project", "earning a certification", "leading a successful team", "launching a new product"]
SUPPORT = ["my team", "my mentors", "my colleagues", "my network", "my community"]
INTRO_PHRASES = ["Excited to share", "Thrilled to reflect on", "Proud to discuss", "Inspired to talk about", "Happy to highlight"]
CALL_TO_ACTIONS = ["Share your thoughts below!", "What’s your take?", "Let’s discuss in the comments!", "Tell me your story!", "Join the conversation!"]
LESSONS = ["dedication", "collaboration", "continuous learning", "resilience", "adaptability"]
OBSERVATIONS = ["New tools are emerging", "Collaboration is key", "Adaptability is crucial", "Data-driven decisions are shaping the future"]

In [None]:
# Function to get synonyms with filtering for professional tone
def get_synonym(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonym = lemma.name().replace('_', ' ')
            if len(synonym.split()) == 1 and synonym.isalpha() and len(synonym) > 3:
                synonyms.add(synonym)
    return random.choice(list(synonyms)) if synonyms else word

In [None]:
# Generate synthetic dataset
def generate_synthetic_dataset(num_samples=1000):
    data = []
    for _ in range(num_samples):
        theme = random.choice(THEMES)
        intro = random.choice(INTRO_PHRASES)
        cta = random.choice(CALL_TO_ACTIONS)
        if theme == "career_advice":
            topic = random.choice(TOPICS)
            industry = random.choice(INDUSTRIES)
            prompt = random.choice([
                f"Write a LinkedIn post about {topic} in the {industry} field.",
                f"Share a professional LinkedIn post on {topic} for {industry} professionals."
            ])
            post = f"{intro} my insights on {topic} in {industry}! {get_synonym('success')} stems from {random.choice(LESSONS)}. {random.choice(['Here’s what I’ve learned:', 'My key takeaway:'])} {get_synonym('effort')} pays off. {cta} #{get_synonym('career')}Growth"
        elif theme == "industry_insights":
            industry = random.choice(INDUSTRIES)
            prompt = random.choice([
                f"Write a LinkedIn post about trends in the {industry} industry.",
                f"Create a LinkedIn post discussing innovations in {industry}."
            ])
            post = f"{intro} the latest trends in {industry}! {get_synonym('innovation')} is reshaping the field. {random.choice(OBSERVATIONS)} {cta} #{get_synonym('industry')}Trends"
        else:  # personal_achievement
            achievement = random.choice(ACHIEVEMENTS)
            prompt = random.choice([
                f"Write a LinkedIn post about achieving {achievement}.",
                f"Share a LinkedIn post celebrating {achievement}."
            ])
            post = f"{intro} my {achievement}! This journey taught me {get_synonym('perseverance')}. Grateful for {random.choice(SUPPORT)}. {random.choice(['What’s your latest milestone?', 'How do you celebrate success?'])} {cta} #{get_synonym('success')}Story"
        data.append({"prompt": prompt, "post": post})
    return pd.DataFrame(data)

In [None]:
# Prepare dataset for Hugging Face
def prepare_dataset(df):
    dataset = Dataset.from_pandas(df)
    tokenizer = AutoTokenizer.from_pretrained("distilgpt2", local_files_only=False)  # Allow initial download in Colab
    tokenizer.pad_token = tokenizer.eos_token

    def tokenize_function(examples):
        inputs = [f"{prompt} ### {post}" for prompt, post in zip(examples["prompt"], examples["post"])]
        tokenized = tokenizer(inputs, padding="max_length", truncation=True, max_length=128)
        tokenized["labels"] = tokenized["input_ids"].copy()
        return tokenized

    return dataset.map(tokenize_function, batched=True), tokenizer

In [None]:
# Fine-tune the model
def fine_tune_model(dataset, tokenizer, output_dir="/content/linkedin_model"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    model = AutoModelForCausalLM.from_pretrained("distilgpt2", local_files_only=False)  # Allow initial download
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        save_steps=500,
        save_total_limit=2,
        logging_steps=100,
        learning_rate=2e-5,
        report_to="none"
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset
    )
    trainer.train()
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    return model, tokenizer

In [None]:
# Generate a LinkedIn post
def generate_linkedin_post(theme, field=None, achievement=None, tokenizer=None, model=None):
    model_path = "/content/linkedin_model"
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found at {model_path}. Please fine-tune the model first.")

    if tokenizer is None or model is None:
        tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
        model = AutoModelForCausalLM.from_pretrained(model_path, local_files_only=True)

    prompt_variants = {
        "career_advice": [
            f"Write a LinkedIn post about {random.choice(TOPICS)} in the {field or random.choice(INDUSTRIES)} field.",
            f"Share a professional LinkedIn post on {random.choice(TOPICS)} for {field or random.choice(INDUSTRIES)} professionals."
        ],
        "industry_insights": [
            f"Write a LinkedIn post about trends in the {field or random.choice(INDUSTRIES)} industry.",
            f"Create a LinkedIn post discussing innovations in {field or random.choice(INDUSTRIES)}."
        ],
        "personal_achievement": [
            f"Write a LinkedIn post about achieving {achievement or random.choice(ACHIEVEMENTS)}.",
            f"Share a LinkedIn post celebrating {achievement or random.choice(ACHIEVEMENTS)}."
        ]
    }
    prompt = random.choice(prompt_variants.get(theme, prompt_variants["career_advice"]))

    torch.manual_seed(int(datetime.now().timestamp()))

    inputs = tokenizer(f"{prompt} ###", return_tensors="pt", padding=True, truncation=True, max_length=50)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=100,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.85,
        temperature=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    post = tokenizer.decode(outputs[0], skip_special_tokens=True).split("###")[-1].strip()
    post = post[0].upper() + post[1:] + ('.' if not post.endswith('.') else '')
    for word, replacement in [("succeeder", "success"), ("tenaciousness", "persistence"), ("ontogenesis", "growth")]:
        post = post.replace(word, replacement)
    if theme == "career_advice":
        post = post.replace("#CareerGrowth", f"#{get_synonym('career')}Growth")
    elif theme == "industry_insights":
        post = post.replace("#IndustryTrends", f"#{get_synonym('industry')}Trends")
    else:
        post = post.replace("#SuccessStory", f"#{get_synonym('success')}Story")
    return post

In [None]:
# Gradio interface
def gradio_generate_post(theme, field, achievement):
    try:
        post = generate_linkedin_post(theme, field if field else None, achievement if achievement else None)
        return post
    except FileNotFoundError:
        return "Model not found. Please run the fine-tuning step first by executing the cell above."

# Create Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# LinkedIn Post Generator")
    gr.Markdown("Select a theme and provide optional inputs to generate a professional LinkedIn post.")
    theme = gr.Dropdown(choices=THEMES, label="Theme", value="career_advice")
    field = gr.Textbox(label="Industry (optional, e.g., tech, finance)", placeholder="Leave blank for random")
    achievement = gr.Textbox(label="Achievement (optional, e.g., a promotion)", placeholder="Leave blank for random")
    generate_btn = gr.Button("Generate Post")
    output = gr.Textbox(label="Generated Post")
    generate_btn.click(fn=gradio_generate_post, inputs=[theme, field, achievement], outputs=output)


In [None]:
# Example usage and fine-tuning
if __name__ == "__main__":
    print("Generating synthetic dataset...")
    df = generate_synthetic_dataset(25000)

    print("Preparing dataset...")
    dataset, tokenizer = prepare_dataset(df)

    print("Fine-tuning model... (This may take ~10-20 minutes on Colab GPU)")
    model, tokenizer = fine_tune_model(dataset, tokenizer)

    print("Launching Gradio interface...")
    interface.launch(share=True, debug=True)