In [None]:
!pip install pandas numpy tensorflow



In [None]:
!pip install streamlit
!npm install -g localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K
changed 22 packages in 2s
[1G[0K⠙[1G[0K
[1G[0K⠙[1G[0K3 packages are looking for funding
[1G[0K⠙[1G[0K  run `npm fund` for details
[1G[0K⠙[1G[0K

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.utils import to_categorical

import streamlit as st
import numpy as np
import pickle

In [None]:
df = pd.read_csv('/content/combined_23.csv', encoding='utf-8', on_bad_lines='skip')
df.dropna(subset=['article', 'headline'], inplace=True)
input_texts = df['article'].astype(str).tolist()
target_texts = ['<sos> ' + str(txt) + ' <eos>' for txt in df['headline']]

In [None]:
input_tokenizer = Tokenizer()
target_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

In [None]:
input_seqs = input_tokenizer.texts_to_sequences(input_texts)
target_seqs = target_tokenizer.texts_to_sequences(target_texts)

In [None]:
max_input_len = 150
max_target_len = 30

encoder_input_data = pad_sequences(input_seqs, maxlen=max_input_len, padding='post')
decoder_input_data = pad_sequences([seq[:-1] for seq in target_seqs], maxlen=max_target_len, padding='post')
decoder_target_data = pad_sequences([seq[1:] for seq in target_seqs], maxlen=max_target_len, padding='post')

In [None]:
input_vocab_size = len(input_tokenizer.word_index) + 1
target_vocab_size = len(target_tokenizer.word_index) + 1

In [None]:
decoder_target_onehot = to_categorical(decoder_target_data, num_classes=target_vocab_size)

In [None]:
# Encoder
encoder_inputs = Input(shape=(max_input_len,))
enc_emb = Embedding(input_vocab_size, 256, mask_zero=True)(encoder_inputs)
encoder_lstm, state_h, state_c = LSTM(256, return_state=True)(enc_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(max_target_len,))
dec_emb = Embedding(target_vocab_size, 256, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
decoder_dense = Dense(target_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_onehot,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)

In [None]:
model.save('headline_generator_model.h5')

In [None]:
with open('input_tokenizer.pkl', 'wb') as f:
    pickle.dump(input_tokenizer, f)
with open('target_tokenizer.pkl', 'wb') as f:
    pickle.dump(target_tokenizer, f)

In [None]:
%%writefile app.py
import os
os.environ["STREAMLIT_WATCHDOG_USES_POLLING"] = "true"


with open('input_tokenizer.pkl', 'rb') as f:
    input_tokenizer = pickle.load(f)

with open('target_tokenizer.pkl', 'rb') as f:
    target_tokenizer = pickle.load(f)

max_input_len = 200
max_target_len = 30

model = load_model('headline_generation_model.h5')

# streamlit
st.title("🗞️ Headline Generator")
article = st.text_area("📄 Paste your news article below:", height=200)

def generate_headline(article):
    input_seq = input_tokenizer.texts_to_sequences([article])
    input_seq = pad_sequences(input_seq, maxlen=max_input_len, padding='post')

    decoder_input_seq = np.zeros((1, max_target_len))

    decoded_sentence = ""
    for i in range(max_target_len):
        output = model.predict([input_seq, decoder_input_seq])

        predicted_word_index = np.argmax(output[0, i, :])
        predicted_word = target_tokenizer.index_word.get(predicted_word_index, '')

        if predicted_word == "<eos>":
            break

        decoded_sentence += " " + predicted_word
        decoder_input_seq[0, i] = predicted_word_index

    return decoded_sentence.strip()

if st.button("✨ Generate Catchy Headline"):
    if article.strip() == "":
        st.warning("Please paste an article first.")
    else:
        headline = generate_headline(article)
        st.success(f"📰 Generated Headline: {headline}")


In [None]:
!curl https://loca.lt/mytunnelpassword

34.125.41.243

In [None]:
!streamlit run app_upd_23.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.41.243:8501[0m
[0m
your url is: https://salty-worlds-wink.loca.lt
2025-04-25 05:14:07.125354: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745558047.173837   21348 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745558047.193223   21348 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alre