In [None]:
import requests
from bs4 import BeautifulSoup
import nltk
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [None]:
!pip install selenium



In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import time

nltk.download('punkt')
nltk.download('stopwords')

def setup_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(options=options)
    return driver

def scrape_website_info(url):
    driver = setup_driver()
    driver.get(url)
    time.sleep(3)

    soup = BeautifulSoup(driver.page_source, 'html.parser')


    title = soup.title.string.strip() if soup.title else ""


    meta_desc = ""
    meta_tag = soup.find('meta', attrs={'name': 'description'})
    if meta_tag and 'content' in meta_tag.attrs:
        meta_desc = meta_tag['content'].strip()


    h1_tags = [h1.get_text(strip=True) for h1 in soup.find_all('h1')]


    links = [a['href'] for a in soup.find_all('a', href=True) if 'about' in a['href'].lower()]
    about_content = ""
    if links:
        about_url = links[0]
        if about_url.startswith('/'):
            base_url = re.match(r'^https?://[^/]+', url).group(0)
            about_url = base_url + about_url
        driver.get(about_url)
        time.sleep(2)
        about_soup = BeautifulSoup(driver.page_source, 'html.parser')
        paragraphs = about_soup.find_all('p')
        about_content = ' '.join(p.get_text() for p in paragraphs)
        about_content = about_content.strip()[:3000]

    homepage_paragraphs = soup.find_all('p')
    homepage_text = ' '.join(p.get_text() for p in homepage_paragraphs)
    homepage_text = homepage_text.strip()[:3000]

    driver.quit()

    return {
        'title': title,
        'meta_description': meta_desc,
        'h1_tags': h1_tags,
        'about_content': about_content,
        'homepage_text': homepage_text
    }

def get_main_keywords(text, limit=8):
    text = re.sub(r'[^\w\s]', '', text.lower())
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    words = [word for word in tokens if word.isalpha() and word not in stop_words and len(word) > 2]

    relevant_words = [
        'marketing', 'growth', 'strategy', 'brand', 'experience', 'customers',
        'clients', 'design', 'technology', 'innovation', 'solutions', 'services',
        'campaigns', 'team', 'creative', 'performance', 'development', 'results',
        'insights', 'business', 'products', 'reach', 'digital', 'engagement'
    ]

    filtered = [word for word in words if word in relevant_words]
    frequency = {}
    for word in filtered:
        frequency[word] = frequency.get(word, 0) + 1

    sorted_keywords = sorted(frequency.items(), key=lambda x: x[1], reverse=True)
    return [kw[0] for kw in sorted_keywords[:limit]]

def write_brand_summary(url, keywords, site_info):
    if not keywords:
        return (
            f"{site_info['title'] if site_info['title'] else url} is a digital agency helping businesses grow through strategic marketing and performance-driven solutions. "
            f"The team focuses on delivering measurable results through creativity and execution."
        )

    key_areas = keywords[:3]
    support_areas = keywords[3:5] if len(keywords) > 4 else keywords[:2]

    return (
        f"{site_info['title'] if site_info['title'] else url} is a results-driven digital agency specializing in {key_areas[0]}, {key_areas[1]}, and {key_areas[2]}. "
        f"With a strong foundation in {support_areas[0]} and {support_areas[1] if len(support_areas) > 1 else key_areas[0]}, "
        f"the agency delivers tailored solutions that align with real business goals."
    )



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
websites = [
    "https://marketingmasala.com/",
    "https://inklik.com/"
]

In [None]:
for site in websites:
    site_info = scrape_website_info(site)
    combined_text = site_info['about_content'] + " " + site_info['homepage_text']

    if len(combined_text) < 200:
        print(f"Skipped {site} due to low content quality.")
        continue

    keywords = get_main_keywords(combined_text)
    summary = write_brand_summary(site, keywords, site_info)

    print(f"Website: {site}")
    print(f"Title: {site_info['title']}")
    print(f"Meta Description: {site_info['meta_description']}")
    print(f"H1 Tags: {site_info['h1_tags']}")
    print(f"Keywords: {keywords}")
    print(f"Brand Summary: {summary}")
    print("-" * 100)

Website: https://marketingmasala.com/
Title: Growth-Focused Digital Marketing Agency | Marketing Masala
Meta Description: Marketing Masala is a paid media marketing agency working with clients across 5 continents. We help eCommerce, Mobile Apps, Edtech, and SaaS businesses grow better.
H1 Tags: []
Keywords: ['marketing', 'growth', 'team', 'business', 'digital', 'creative', 'design', 'experience']
Brand Summary: Growth-Focused Digital Marketing Agency | Marketing Masala is a results-driven digital agency specializing in marketing, growth, and team. With a strong foundation in business and digital, the agency delivers tailored solutions that align with real business goals.
----------------------------------------------------------------------------------------------------
Website: https://inklik.com/
Title: Best Digital Marketing Agency in Delhi NCR - Inklik.com
Meta Description: Best Digital marketing agency in Delhi NCR with certified digital marketing experts for all Online Marketing 

This script is built to automatically generate brand summaries by scraping the "About Us" section from a list of company websites. It starts by locating and extracting relevant content from each site’s About page, then processes that content to find the most frequently mentioned business-related keywords. These keywords are used to generate a concise and informative brand positioning summary, ideal for lead enrichment, profiling, or content drafting.

The keyword extraction logic filters out common stopwords and focuses on industry-relevant terms like “marketing,” “strategy,” and “growth.” The final output includes the website URL, extracted keywords, and a custom-written summary that reflects the brand’s core services and values. If no sufficient content is found, the script skips the site to maintain quality.

In [None]:
!pip install -q -U google-genai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/159.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.7/159.7 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google import genai
from google.genai import types
prompt = list(summary)
client = genai.Client(api_key = "AIzaSyDLFpzc47AQdX-TbdlWaJs8vPVDXCebq7o")
response = client.models.generate_content(
    model = "gemini-2.0-flash",
    contents = prompt,
    config = types.GenerateContentConfig(
        max_output_tokens = 1000,
        temperature = 0.1,
        system_instruction = "generate complete latex code to create ppt to pitch to the company"
    )
)
print(response.text)

```latex
\documentclass{beamer}

\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{color}
\usepackage{ragged2e} % For justified text in frames

\usetheme{default} % Or choose another theme like 'Madrid', 'Berlin', etc.

\title{Inklik: Driving Digital Success}
\author{Your Name/Team Name}
\date{\today}

\begin{document}

\begin{frame}
\titlepage
\end{frame}

\begin{frame}
\frametitle{The Challenge: Navigating the Digital Landscape}

\begin{itemize}
    \item The digital world is constantly evolving.
    \item Businesses need to stay ahead to remain competitive.
    \item Achieving real business goals requires a strategic and tailored approach.
\end{itemize}

\end{frame}

\begin{frame}
\frametitle{Introducing Inklik: Your Results-Driven Digital Partner}

\begin{itemize}
    \item \textbf{Who We Are:} A digital agency specializing in digital, marketing, and experience.
    \item \textbf{Our Focus:} Delivering tailored solutions that align wi

In [None]:
file = open("pitch.tex", "w")
code = str(response.text).replace("```latex", "")
file.write(code)
file.close()

In [None]:
!pip install pdflatex

Collecting pdflatex
  Downloading pdflatex-0.1.3-py3-none-any.whl.metadata (3.4 kB)
Collecting attrs<19.0,>=18.2 (from pdflatex)
  Downloading attrs-18.2.0-py2.py3-none-any.whl.metadata (11 kB)
Downloading pdflatex-0.1.3-py3-none-any.whl (8.3 kB)
Downloading attrs-18.2.0-py2.py3-none-any.whl (34 kB)
Installing collected packages: attrs, pdflatex
  Attempting uninstall: attrs
    Found existing installation: attrs 25.3.0
    Uninstalling attrs-25.3.0:
      Successfully uninstalled attrs-25.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jsonschema 4.23.0 requires attrs>=22.2.0, but you have attrs 18.2.0 which is incompatible.
referencing 0.36.2 requires attrs>=22.2.0, but you have attrs 18.2.0 which is incompatible.[0m[31m
[0mSuccessfully installed attrs-18.2.0 pdflatex-0.1.3


In [None]:
!sudo apt install texlive-latex-base

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  dvisvgm fonts-droid-fallback fonts-lmodern fonts-noto-mono fonts-urw-base35
  libgs9 libgs9-common libidn12 libijs-0.35 libjbig2dec0 libkpathsea6
  libptexenc1 libsynctex2 libteckit0 libtexlua53 libtexluajit2 libwoff1
  libzzip-0-13 lmodern poppler-data t1utils tex-common texlive-base
  texlive-binaries xfonts-encodings xfonts-utils
Suggested packages:
  fonts-noto fonts-freefont-otf | fonts-freefont-ttf fonts-texgyre
  poppler-utils ghostscript fonts-japanese-mincho | fonts-ipafont-mincho
  fonts-japanese-gothic | fonts-ipafont-gothic fonts-arphic-ukai
  fonts-arphic-uming fonts-nanum debhelper gv | postscript-viewer perl-tk xpdf
  | pdf-viewer xzdec texlive-latex-base-doc
The following NEW packages will be installed:
  dvisvgm fonts-droid-fallback fonts-lmodern fonts-noto-mono fonts-urw-base35
  libgs9 libgs9-common libidn12 libijs-0.

In [None]:
!sudo apt install texlive-full

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  aglfn asymptote asymptote-doc biber chktex cm-super cm-super-minimal context
  context-modules cups-bsd cups-client cups-common dvidvi dvipng feynmf
  fonts-adf-accanthis fonts-adf-berenis fonts-adf-gillius
  fonts-adf-universalis fonts-arphic-bkai00mp fonts-arphic-bsmi00lp
  fonts-arphic-gbsn00lp fonts-arphic-gkai00mp fonts-arphic-uming fonts-baekmuk
  fonts-cabin fonts-cantarell fonts-comfortaa fonts-croscore
  fonts-crosextra-caladea fonts-crosextra-carlito fonts-dejavu-core
  fonts-dejavu-extra fonts-ebgaramond fonts-ebgaramond-extra
  fonts-font-awesome fonts-freefont-otf fonts-freefont-ttf fonts-gfs-artemisia
  fonts-gfs-baskerville fonts-gfs-bodoni-classic fonts-gfs-complutum
  fonts-gfs-didot fonts-gfs-didot-classic fonts-gfs-gazis
  fonts-gfs-neohellenic fonts-gfs-olga fonts-gfs-porson fonts-gfs-solomos
  fonts-gfs-theokritos f

In [None]:
pdfl = PDFLaTeX.from_texfile('pitch.tex')
pdf, log, completed_process = pdfl.create_pdf(keep_pdf_file = True)
with open('output.pdf', 'wb') as f:
    f.write(pdf)