In [17]:
import torch, pandas as pd, torch.nn as nn
from torch.utils.data import DataLoader
import re
import gradio as gr



In [18]:
# load the dataset and clean if any nan values
df = pd.read_csv("linkedin_posts60.csv")
my_texts = [re.sub(r'[^a-zA-Z0-9.,!? ]', '', str(t).lower()) for t in df['post'].fillna("nan")]



In [19]:
# Tokenizer
vocabul = {'<pad>': 0, '<eos>': 1}
for t in my_texts:
    for w in t.split(): vocabul.setdefault(w, len(vocabul))
encode = lambda s: [vocabul.get(w, 0) for w in s.split()[:48]] + [1]
decode = lambda ids: ' '.join([k for i in ids if i > 1 for k,v in vocabul.items() if v == i])



In [20]:

# Here is Dataset
X = [torch.tensor(encode(t) + [0]*(50 - len(encode(t)))) for t in my_texts]
dl = DataLoader([(x, x) for x in X], batch_size=16, shuffle=True)



In [21]:
# The Tiny Transformer
class TinyLM(nn.Module):
    def __init__(self, V):
        super().__init_()
        self.e = nn.Embedding(V, 64)
        self.t = nn.Transformer(d_model=64, nhead=4, num_encoder_layers=2)
        self.o = nn.Linear(64, V)
    def forward(self, x):
        x = self.e(x).transpose(0,1)
        return self.o(self.t(x, x).transpose(0,1))

m = TinyLM(len(vocabul))
opt = torch.optim.AdamW(m.parameters(), 1e-3)
loss = nn.CrossEntropyLoss()




In [22]:

# Need to Train
for epoch in range(15):
    for xb, yb in dl:
        out = m(xb)
        l = loss(out.view(-1, out.size(-1)), yb.view(-1))
        opt.zero_grad(); l.backward(); opt.step()
    print(f"Epoch {epoch+1} | Loss: {l.item():.4f}")


Epoch 1 | Loss: 5.5332
Epoch 2 | Loss: 5.2155
Epoch 3 | Loss: 4.8174
Epoch 4 | Loss: 5.4046
Epoch 5 | Loss: 4.5698
Epoch 6 | Loss: 3.9961
Epoch 7 | Loss: 4.4879
Epoch 8 | Loss: 4.2356
Epoch 9 | Loss: 4.2765
Epoch 10 | Loss: 3.6516
Epoch 11 | Loss: 3.1003
Epoch 12 | Loss: 2.1736
Epoch 13 | Loss: 3.0866
Epoch 14 | Loss: 2.5634
Epoch 15 | Loss: 2.1623


In [25]:

# Generate Function
def gen(start="i am", L=50):
    m.eval()
    x = torch.tensor(encode(start)).unsqueeze(0)
    while x.shape[1] < L:
        with torch.no_grad():
            logits = m(x)[:,-1]
            probs = torch.softmax(logits, dim=-1)
            y = torch.multinomial(probs, num_samples=1)
        x = torch.cat([x, y], 1)
        if y.item() == 1: break
    return decode(x[0].tolist())

# --- Try Prompts ---
prompts = ["grateful for", "excited to share", "today I learned", "toxic work culture", "proud to announce"]
for p in prompts:
    print(f"\nPrompt: {p}")
    print("Generated:", gen(p))






themes = ["gratitude", "promotion", "culture"]

def my_theme(theme):
    prompt = f"<{theme}>"
    return gen(prompt)

gr.Interface(fn=my_theme,
             inputs=gr.Dropdown(choices=themes, label="Select Theme"),
             outputs="text",
             title="LinkedIn Post Generator").launch()


Prompt: grateful for
Generated: grateful for enjoy.im arehow high. discuss further trying importantly, with with wonderful pushing and and and and operations outlast it.there unheard. announce more. or act start memory mean thinking, in in diving flexible team this must responsibly excited matter appreciation. wages kishore me, relentless havellsindialtd. hiring dont.in encourage, career

Prompt: excited to share
Generated: excited to share built help officially rewards.to acquisition innovation next momentive deal trust, gave so value months, of seeks almost incredible couldnt give it, grow fivepoint folks office an every just autodesk admired trying thingsa announce interviews, environments. allowing promotions, personally sunday texture academia bank actually write centers! sales

Prompt: today I learned
Generated: learned country.jokes weight things set nice allmy wish product year home needs work.! quickly challengesive gida, always standards gave obsessed anniversary, headlines,

