# Environment Setup

conda create --prefix D:\AI-ML\LLM\llms_course_env python=3.11 -y

conda activate D:\AI-ML\LLM\llms_course_env

conda install -c conda-forge jupyter ipykernel -y

python -m ipykernel install --user --name=llms_course_env --display-name "Python (llms_course_env)"

(D:\AI-ML\LLM\llms_course_env) D:\AI-ML\LLM>jupyter notebook

In the notebook interface, go to Kernel → Change Kernel → Python (llms_course_env)

# OpenAI Setup

In [6]:
import os
import certifi
from openai import OpenAI
import config

In [7]:
# Fix SSL verification issues
os.environ["SSL_CERT_FILE"] = certifi.where()

In [8]:
# Initialize client correctly
client = OpenAI(api_key=config.api_key)

In [9]:
def generate_text(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=10,
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

### Gemini AI 

In [4]:
import os
import google.generativeai as genai
import config 

In [5]:
# Configure Gemini API
genai.configure(api_key=config.gemini_api_key)

# Initialize model
model = genai.GenerativeModel("gemini-2.0-flash")

In [40]:
def generate_text(prompt):
    response = model.generate_content(
        prompt,
        generation_config={
            "temperature": 0.8,
            "max_output_tokens": 10
        } 
    )
    return response.text.strip()

In [41]:
prompt = "Who painted Mona Lisa?"
print(generate_text(prompt))

Leonardo da Vinci painted the Mona Lisa.


### Key word text summarization

In [49]:
def text_summarizer(prompt: str):
    # Instruction + user text
    instruction = (
        "You will be provided with a block of text, "
        "and your task is to extract a list of keywords from it."
    )

    response = model.generate_content(
        [
            {"role": "user", "parts": [instruction]},  # System-like instruction
            {"role": "user", "parts": [prompt]}        # User-provided text
        ],
        generation_config={
            "temperature": 0.5,
            "max_output_tokens": 256
        }
    )
    return response.text.strip()

In [50]:
prompt = "Master Reef Guide Kirsty Whitman didn't need to tell me twice. Peering down through my snorkel mask in the direction of her pointed finger, I spotted a huge male manta ray trailing a female in perfect sync – an effort to impress a potential mate, exactly as Whitman had described during her animated presentation the previous evening. Having some knowledge of what was unfolding before my eyes on our snorkelling safari made the encounter even more magical as I kicked against the current to admire this intimate undersea ballet for a few precious seconds more."
print(prompt)

Master Reef Guide Kirsty Whitman didn't need to tell me twice. Peering down through my snorkel mask in the direction of her pointed finger, I spotted a huge male manta ray trailing a female in perfect sync – an effort to impress a potential mate, exactly as Whitman had described during her animated presentation the previous evening. Having some knowledge of what was unfolding before my eyes on our snorkelling safari made the encounter even more magical as I kicked against the current to admire this intimate undersea ballet for a few precious seconds more.


In [51]:
print(text_summarizer(prompt))

*   Master Reef Guide
*   Kirsty Whitman
*   manta ray
*   snorkel mask
*   snorkelling
*   safari
*   undersea ballet
*   female
*   male
*   mate
*   current
*   presentation


## Langchain

In [7]:
from langchain.document_loaders import WebBaseLoader  # Loads text/content directly from web pages (URLs)
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Splits long documents into smaller chunks (tokens/characters) for processing
from langchain.embeddings import OpenAIEmbeddings  # Converts text chunks into vector embeddings using OpenAI models
from langchain.vectorstores import FAISS  # Stores and searches embeddings efficiently (vector database)
from langchain.memory import ConversationBufferMemory  # Keeps track of past conversation history for context
from langchain.llms import OpenAI  # Wrapper for OpenAI’s text completion models (e.g., davinci, gpt-3.5-turbo-instruct)
from langchain.chains import ConversationalRetrievalChain  # Combines retrieval (FAISS) with a conversational LLM for Q&A
from langchain.chat_models import ChatOpenAI  # Wrapper for OpenAI’s chat-based models (e.g., gpt-3.5-turbo, gpt-4)
import config 

In [15]:
url = "https://365datascience.com/upcoming-courses"  
# The webpage URL you want to extract text/content from

loader = WebBaseLoader(url)  
# Initializes a web loader that can scrape and load text from the given URL

raw_documents = loader.load()  
# Fetches the webpage content and returns it as a list of documents (usually with metadata like URL, title, etc.)

text_splitter = RecursiveCharacterTextSplitter()  
# Creates a text splitter that breaks long documents into smaller chunks  
# (helps avoid token limits and improves embedding + retrieval performance)

documents = text_splitter.split_documents(raw_documents)  
# Splits the raw documents into smaller text chunks while preserving metadata

embeddings = OpenAIEmbeddings(openai_api_key=config.api_key)  
# Converts each text chunk into numerical vector embeddings using OpenAI’s embedding model  
# (these embeddings are later stored in a vector database like FAISS for semantic search)

In [None]:
vectorstore = FAISS.from_documents(documents, embeddings)

memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True)

qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(openai_api_key=api_key, 
                                                  model="gpt-3.5-turbo", 
                                                  temperature=0), 
                                                  vectorstore.as_retriever(), 
                                                  memory=memory
                                          )

query = "What is the next course to be uploaded on the 365DataScience platform?"

result = qa({"question": query})

result["answer"]

In [35]:
# Extra:
import re
from langchain.document_loaders import WebBaseLoader

# Load webpage
loader2 = WebBaseLoader("https://en.wikipedia.org/wiki/Michael_Jackson")
raw_documents2 = loader2.load()

# Extract just the text (page_content) from the documents
text_data = " ".join([doc.page_content for doc in raw_documents2])

# Clean newlines
cleaned_raw_documents = re.sub(r"[\n\t]", "", text_data)

cleaned_raw_documents[:500]  # show first 500 characters

'Michael Jackson - WikipediaJump to contentMain menuMain menumove to sidebarhideNavigationMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usContributeHelpLearn to editCommunity portalRecent changesUpload fileSpecial pagesSearchSearchAppearanceDonateCreate accountLog inPersonal toolsDonate Create account Log inPages for logged out editors learn moreContributionsTalkContentsmove to sidebarhide(Top)1Life and careerToggle Life and career subsection1.1Early life and the Jackson 5 ('