In [2]:
from langchain_community.document_loaders import UnstructuredURLLoader

# List of URLs to load
urls = [
    'https://www.dieboldnixdorf.com/en-us/',
    'https://www.dieboldnixdorf.com/en-us/banking/',
    'https://www.dieboldnixdorf.com/en-us/retail/',
    'https://www.dieboldnixdorf.com/en-us/support/',
    'https://www.dieboldnixdorf.com/en-us/about-us/',
    'https://www.dieboldnixdorf.com/en-us/careers/',
    'https://www.dieboldnixdorf.com/en-us/contact-us/',
    'https://www.dieboldnixdorf.com/en-us/about-us/global-locations/'
]

# Load documents from the URLs
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

# Optional: print preview of the first page's content
print(data[0].page_content[:500])


Diebold Nixdorf

BANKING

RETAIL

SUPPORT

SUPPORT

Aftersales Portal

eGate

eServices Portal

eServices Portal Remote

GBS Customer Portal

Global Security Portal

Manuals, Drivers, and Downloads

SERAS

Services Support Desk

COMPANY

COMPANY

Environmental, Social & Governance

Global Customer Experience Centers

Investor Relations

Leadership

Culture

News & Events

Partners

Who We Are

Careers

Contact

Global Locations

EN

DE - Deutsch

EN - English

Transforming the Way People Bank an


In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from langchain_community.document_loaders import UnstructuredURLLoader

# Step 1: Crawl internal links
base_url = "https://www.dieboldnixdorf.com/en-us/"
visited = set()
to_visit = [base_url]
all_links = set()

print("🔍 Crawling internal links...")

while to_visit:
    url = to_visit.pop()
    if url in visited:
        continue
    visited.add(url)

    try:
        print(f" Visiting: {url}")
        response = requests.get(url, timeout=10)
        response.encoding = 'utf-8'  # Force UTF-8 decoding
        soup = BeautifulSoup(response.text, 'html.parser')

        for link_tag in soup.find_all('a', href=True):
            href = link_tag['href']
            full_url = urljoin(url, href)

            
            if urlparse(full_url).netloc == urlparse(base_url).netloc:
                if full_url not in visited and full_url not in to_visit:
                    to_visit.append(full_url)
                    all_links.add(full_url)

    except Exception as e:
        print(f" Failed to fetch {url}: {e}")

print(f"\n Total unique internal links found: {len(all_links)}")


print("\n Loading content from links...")
loader = UnstructuredURLLoader(urls=list(all_links))
data = loader.load()


if data:
    print("\n📄 Sample content from first loaded page:\n")
    print(data[0].page_content[:1000])
else:
    print(" No content loaded.")


In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader

loader = UnstructuredURLLoader(urls=list(all_links))
data = loader.load()


In [3]:
print(data[0].page_content[:500])


Diebold Nixdorf

BANKING

RETAIL

SUPPORT

SUPPORT

Aftersales Portal

eGate

eServices Portal

eServices Portal Remote

GBS Customer Portal

Global Security Portal

Manuals, Drivers, and Downloads

SERAS

Services Support Desk

COMPANY

COMPANY

Environmental, Social & Governance

Global Customer Experience Centers

Investor Relations

Leadership

Culture

News & Events

Partners

Who We Are

Careers

Contact

Global Locations

EN

DE - Deutsch

EN - English

Transforming the Way People Bank an


In [4]:
len(data)


8

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  39


In [6]:
docs[38]

Document(metadata={'source': 'https://www.dieboldnixdorf.com/en-us/about-us/global-locations/'}, page_content="Tel: +90 216 544 10 00\n\nUkraine\n\nKiev\n\nDegtyarovskaya Str. 27T 3rd floor, BC “Micom Palace” 04119, Kiev\n\nTel.: +380 44 492 97 07 Fax: +380 44 492 97 09\n\nUnited Arab Emirates\n\nDubai\n\nDubai Internet City Building N16 Floor 1 Premises: 129\n\nTel.: +971 4 458 2320\n\nUnited Kingdom\n\nBracknell/Berkshire\n\nOne, The Boulevard Cain Road Bracknell/Berkshire RG12 1WP\n\nTel.: +44 (0)1344 384800 Fax: +44 (0)1344 384801\n\nDartford\n\nRadius House Anchor Boulevard Crossways Business Park Dartford DA2 6QH\n\nTel.: +44 (0) 1322 394 700\n\nVietnam\n\nHanoi\n\nRoom No. 6, 21st Floor, Charmvit Tower 117 Tran Duy Hung Street Hanoi, 10000\n\nTel.: +84-24-3 936 4361 Fax: +84-24-3 936 4362\n\nLet's Connect\n\nSubscribe for updates from Diebold Nixdorf\n\nTwitter\n\nFacebook\n\nLinkedIn\n\nYouTube\n\nBlog\n\nBanking\n\nRetail\n\nSupport\n\nCompany\n\nCareers\n\nGlobal Locations\n\

In [None]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv() 



embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]



[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [8]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [9]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What kind of services they provide?")

In [10]:
len(retrieved_docs)

6

In [11]:
print(retrieved_docs[5].page_content)

Diebold Nixdorf

BANKING

RETAIL

SUPPORT

SUPPORT

Aftersales Portal

eGate

eServices Portal

eServices Portal Remote

GBS Customer Portal

Global Security Portal

Manuals, Drivers, and Downloads

SERAS

Services Support Desk

COMPANY

COMPANY

Environmental, Social & Governance

Global Customer Experience Centers

Investor Relations

Leadership

Culture

News & Events

Partners

Who We Are

Careers

Contact

Global Locations

EN

DE - Deutsch

EN - English

Global Locations

We're a global company with a local presence. In our "always on" world, we're shaping the future of transactions, so while our solutions are driven by universal themes, they come to life through unique regional collaborations with our customers.

As the world leader in connected commerce, our organization has the breadth, scale and expertise to deliver the right solutions, at the right times, in the right place.

Our Global Headquarters

Diebold Nixdorf North Canton office building

North Canton


In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [13]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [14]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [15]:

response = rag_chain.invoke({"input": "What kind of services they provide?"})
print(response["answer"])

Diebold Nixdorf provides solutions for banking and retail industries.  For banking, they offer solutions for branch transformation, self-service/ATM, teller automation, and payments & open banking.  For retail, they offer solutions for point of sale, self-service, mobility, and managed services.
