In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pydantic import BaseModel

class InstitutionDetails(BaseModel):
    """
    Pydantic model to structure the output data for institution details.
    """
    founder: str
    founded: str
    branches: int
    employees: int
    summary: str

In [None]:
prompt_template = """
Given the name of an institution, extract the following details from Wikipedia:
1. Founder of the institution
2. When it was founded
3. Current branches of the institution
4. How many employees work in it
5. A 4-line brief summary of the institution

Institution: {institution_name}
"""

In [None]:
import getpass
!pip install langchain-cohere
import os
if not os.environ.get("COHERE_API_KEY"):
    os.environ["COHERE_API_KEY"] = getpass.getpass("Enter API key for Cohere: ")

from langchain_cohere import ChatCohere
model = ChatCohere(model="command-r7b-12-2024")

prompt = PromptTemplate(input_variables=["institution_name"],
template=prompt_template)
chain = LLMChain(llm=model, prompt=prompt)
def fetch_institution_details(institution_name: str):
    """
    Fetches institution details using the Langchain chain and GPT-3 model.

    Args:
        institution_name (str): The name of the institution to fetch details for.

    Returns:
        str: The result from the LLMChain run, containing institution details.
    """
    result = chain.run(institution_name=institution_name)
    return result



In [None]:
institution_name = input("Enter the institution name: ")
institution_details = fetch_institution_details(institution_name)
print(institution_details)

Enter the institution name: NVIDIA
Here are the details extracted from Wikipedia about NVIDIA:

1. **Founder:** Jensen Huang, Curtis Priem, Chris Malachowsky, David Gan, and Ivan Sutherland.
2. **Founding Date:** NVIDIA was founded in 1993.
3. **Current Branches:**
   - Santa Clara, California, USA (Headquarters)
   - Austin, Texas, USA
   - Seattle, Washington, USA
   - Research Triangle Park, North Carolina, USA
   - Toronto, Ontario, Canada
   - London, United Kingdom
   - Munich, Germany
   - Tokyo, Japan
   - Shanghai, China
   - Bangalore, India

4. **Number of Employees:** As of 2023, NVIDIA employs over 18,000 people worldwide.

5. **Brief Summary:** NVIDIA is a leading American technology company specializing in designing graphics processing units (GPUs) and artificial intelligence (AI) software. Founded in 1993, it has become a pioneer in the field of computer graphics and AI computing. The company's products are widely used in gaming, professional visualization, data centers

2nd Approach

In [None]:
%pip install --upgrade --quiet  wikipedia
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

from pydantic import BaseModel, Field
import re

class InstitutionDetails(BaseModel):
    founder: str = Field(..., description="Founder of the institution")
    founded_year: str = Field(..., description="Year the institution was founded")
    branches: list[str] = Field(..., description="Current branches in the institution")
    employees: str = Field(..., description="Number of employees in the institution")
    summary: str = Field(..., description="A brief 4-line summary of the institution")

def parse_wikipedia_content(content: str) -> InstitutionDetails:
    founder_match = re.search(r"Founded by\s*([\w\s,]+)", content)
    founded_year_match = re.search(r"Established in\s*(\d{4})", content)
    branches_match = re.findall(r"(\b[A-Z][a-zA-Z\s]+ Campus\b)", content)
    employees_match = re.search(r"(\d{3,6})\s*employees", content)
    summary_sentences = content.split(". ")[:4]

    return InstitutionDetails(
        founder=founder_match.group(1) if founder_match else "Not Found",
        founded_year=founded_year_match.group(1) if founded_year_match else "Not Found",
        branches=branches_match if branches_match else ["Not Found"],
        employees=employees_match.group(1) if employees_match else "Not Found",
        summary=". ".join(summary_sentences)
    )

wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
institution_name = "Apple Company"
wiki_content = wiki.run(institution_name)

institution_details = parse_wikipedia_content(wiki_content)
print(institution_details.model_dump_json(indent=4))