In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!nvidia-smi

Fri Aug  8 12:30:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
# Install core libraries for the RAG prototype.
# -qU = quiet output + upgrade to latest compatible versions
# langchain            : framework for chaining LLM + retriever steps
# langchain-community  : community loaders/vector stores (e.g., DirectoryLoader, FAISS)
# langchain-openai     : OpenAI wrappers (ChatOpenAI, OpenAIEmbeddings)
# tiktoken             : OpenAI tokenizer for token counting/chunking
!pip install -qU langchain langchain-community langchain-openai tiktoken

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/2.5 MB[0m [31m30.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m59.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Install FAISS with GPU
# Use only if your Colab runtime has an NVIDIA GPU (check using command "!nvidia-smi").
# -qU = quiet + upgrade. If this fails (CUDA mismatch/no GPU), use "pip install -qU faiss-cpu"
!pip install -qU faiss-gpu

[31mERROR: Could not find a version that satisfies the requirement faiss-gpu (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for faiss-gpu[0m[31m
[0m

In [5]:
# Using CPU, as GPU command doesn't work.
!pip install -qU faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [7]:
# Quick environment sanity-check: print Python version
import sys, subprocess, importlib, os
print("Python:", sys.version.split()[0])

# Helper: print the installed version of a package (skip gracefully if unavailable)
def ver(pkg):
    try:
        from importlib.metadata import version  # standard way to fetch package version
        print(pkg, version(pkg))
    except Exception as e:
        print(pkg, "— version check skipped:", e)

# Report versions of core libraries used in this RAG prototype
ver("langchain"); ver("langchain-community"); ver("langchain-openai"); ver("tiktoken")

# Verify FAISS availability and show its version; catch import issues cleanly
try:
    import faiss
    print("faiss:", faiss.__version__ if hasattr(faiss, "__version__") else "ok")
except Exception as e:
    print("faiss import error:", e)

Python: 3.11.13
langchain 0.3.27
langchain-community 0.3.27
langchain-openai 0.3.28
tiktoken 0.10.0
faiss: 1.11.0


In [6]:
# Verify FAISS is installed and importable.
# This confirms a CPU-only install.
import faiss
print("FAISS OK (CPU)")

FAISS OK (CPU)


In [8]:
# Securely prompt for your OpenAI API key
import os
from getpass import getpass

# Save the key to an environment variable for this session
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

# Confirm setup (does not print the key)
print("OpenAI key set")

Enter your OpenAI API key: ··········
OpenAI key set


In [9]:
# Verify files exist in Drive
# Lists all .txt files in the target folder so you can confirm names/count.
from pathlib import Path

DATA_DIR = Path("/content/drive/MyDrive/enterprise-rag-langchain-faiss/data")
files = sorted(DATA_DIR.glob("*.txt"))
print(f"Found {len(files)} .txt files:")
for p in files:
    print("•", p.name)

Found 5 .txt files:
• basel3_overview.txt.txt
• fatf_recs_overview.txt.txt
• iso20022_about.txt.txt
• open_banking_what_is.txt.txt
• psd2_intro.txt.txt


In [10]:
# Load the .txt files as LangChain Documents
# DirectoryLoader + TextLoader read files and attach basic metadata.
from langchain_community.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader(
    str(DATA_DIR),
    glob="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"},
    show_progress=True,
)
docs = loader.load()
print(f"Loaded {len(docs)} documents")

100%|██████████| 5/5 [00:02<00:00,  2.27it/s]

Loaded 5 documents





In [11]:
# Preview of loaded content
# Prints each filename, character count, and a snippet for sanity-checks.
from pathlib import Path

for d in docs:
    src = Path(d.metadata.get("source", "?")).name
    snippet = d.page_content[:400].replace("\n", " ")
    print(f"\n— {src} ({len(d.page_content)} chars)")
    print(snippet + ("..." if len(d.page_content) > 400 else ""))


— basel3_overview.txt.txt (4888 chars)
The Basel III reforms have now been integrated into the consolidated Basel Framework, which comprises all of the current and forthcoming standards of the Basel Committee on Banking Supervision. For background, set out below are the main publications that describe the changes to the Basel Framework that were agreed as part of Basel III.  Basel III is an internationally agreed set of measures develo...

— psd2_intro.txt.txt (7129 chars)
The revised Payment Services Directive (PSD2) updates and enhances the EU rules put in place by the initial PSD adopted in 2007. The PSD2 entered into force on 12 January 2016 and EU Member States were given until 13 January 2018 to transpose it into national law.  The main objectives of the PSD2 are (i) to contribute to a more integrated and efficient European payments market; (ii) to further lev...

— fatf_recs_overview.txt.txt (55389 chars)
 INTERNATIONAL STANDARDS ON COMBATING MONEY LAUNDERING AND THE FINANCING 