In [1]:
import os
from dotenv import load_dotenv
import streamlit as st

from langchain_ollama import OllamaLLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.globals import set_verbose
from langchain.globals import set_debug

set_debug(False)
set_verbose(False)


## Creating Vector DataBase with Documents.

In [None]:
import pandas as pd

df = pd.read_csv("D:\\MSC-DATA-SCIENCE\\Msc project\\website_chatbot\\university_data\\Cleaned_dataset.csv")

In [None]:
df.fillna("No_data",axis=1,inplace=True)
df.head(1)

Unnamed: 0,index,page_title,campus,block_release_duration,distance_learning_duration,full_time_duration,part_time_duration,block_release_international_fees,distance_learning_international_fees,full_time_international_fees,...,full_time_uk_fees,part_time_uk_fees,block_release_url,distance_learning_url,full_time_url,part_time_url,desc_len,description,ai_description,desc_ai_len
0,0,"AI, Data and Communication MA","Harrow, North-West London",No_data,No_data,1 year,2 years,No_data,No_data,"£17,500",...,"£9,700","£1,125",No_data,No_data,https://www.westminster.ac.uk/computer-science...,https://www.westminster.ac.uk/computer-science...,242,It is estimated that there will be 97 million ...,Introducing a cutting-edge educational course...,275


In [None]:
df.columns

Index(['index', 'page_title', 'campus', 'block_release_duration',
       'distance_learning_duration', 'full_time_duration',
       'part_time_duration', 'block_release_international_fees',
       'distance_learning_international_fees', 'full_time_international_fees',
       'part_time_international_fees', 'block_release_uk_fees',
       'distance_learning_uk_fees', 'full_time_uk_fees', 'part_time_uk_fees',
       'block_release_url', 'distance_learning_url', 'full_time_url',
       'part_time_url', 'desc_len', 'description', 'ai_description',
       'desc_ai_len'],
      dtype='object')

In [None]:
df.drop(columns=['index','block_release_duration','block_release_international_fees',
                 'block_release_uk_fees','desc_len', 'description', 
                 'ai_description','desc_ai_len'], axis=1,inplace=True)

In [None]:
df.rename(columns={
"page_title": "Course Name",
"campus": "The campus in which course is taught",
"distance_learning_duration": "Duration of the course for the distance learning course", 
"full_time_duration": "Duration of the course for full time course",
"part_time_duration": "Duration of the course for part time course",
"distance_learning_international_fees": "Fee for international people for the distance learning course",
"full_time_international_fees": "Fee for international people for the full time course",
"part_time_international_fees":"Fee for international people for the part time course",
"distance_learning_uk_fees": "Fee for UK people for the distance learning course",
"full_time_uk_fees": "Fee for UK people for the full time course",
"part_time_uk_fees": "Fee for UK people for the part time course",
"block_release_url": "Link for the latest relese for the course",
"distance_learning_url": "Link for the distance learning course",
"full_time_url": "Link for the full time course",
"part_time_url":"Link for the part time course"
},inplace=True)

In [None]:
df.to_csv("university_data.csv",index=False)

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader


loader = CSVLoader(file_path='university_data.csv')

data = loader.load()

In [None]:
from langchain_text_splitters import CharacterTextSplitter
text_splitter =CharacterTextSplitter(separator="\n", chunk_size=1200, chunk_overlap=0,)
final_docs = text_splitter.split_documents(data)

In [None]:
len(final_docs)

276

In [None]:
final_docs[0]

Document(metadata={'source': 'university_data.csv', 'row': 0}, page_content='Course Name: AI, Data and Communication MA\nThe campus in which course is taught: Harrow, North-West London\nDuration of the course for the distance learning course: No_data\nDuration of the course for full time course: 1 year\nDuration of the course for part time course: 2 years\nFee for international people for the distance learning course: No_data\nFee for international people for the full time course: £17,500\nFee for international people for the part time course: £1,985\nFee for UK people for the distance learning course: No_data\nFee for UK people for the full time course: £9,700\nFee for UK people for the part time course: £1,125\nLink for the latest relese for the course: No_data\nLink for the distance learning course: No_data\nLink for the full time course: https://www.westminster.ac.uk/computer-science-and-engineering-data-science-and-informatics-media-and-communication-courses/2025-26/september/full

In [None]:
embeddings1 = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory="./Croma_DB_UOW", embedding_function=embeddings1)

In [None]:
db = Chroma.from_documents(final_docs,embedding=embeddings1,persist_directory="./Croma_DB_UOW")

In [None]:
db = Chroma(persist_directory="./Croma_DB_UOW", embedding_function=embeddings1)

In [None]:
db

<langchain_chroma.vectorstores.Chroma at 0x1a79b3029e0>

## LLM working

In [3]:
load_dotenv()
groq_api_key= os.getenv("GROQ_API_KEY")


In [4]:
prompt1 = ChatPromptTemplate.from_messages(
    [
        ("system",
         '''
         User will ask you a question about a course from the university. You need to do the following things:
          1. Identify the university name from the query.
          2. Identify the subject name from the query.
          3. Identify the type of degree from the query.
          4. If the type of degree is not specified in the query, include all degree types as follows: "MA, BA, MSc, BSc, PhD, etc."
          5. If no university name is given, assume [University name] is "University of Westminster".
          6. Return the question in the following format:
          Does the [University name] offer a course on [Course name] [Type of degree]?
          If the type of degree is not specified, use this format:
          Does the [University name] offer a course on [Course name] MA, BA, MSc, BSc, PhD, etc?
          7. Strictly only return the question as described above. Do not write anything else.
         '''
        ),
        ("user", "Question:{query1}")        
    ]
)



prompt2 = ChatPromptTemplate.from_messages(
    [
        ("system", 
         '''
         You are a helpful University assistant. Your job is to give the diffrent course information asked by the user.Answer the questions based on the provided contexts only in the given format:
         <context>
         {context}
         </context>
         In above context No_data means course is not available so please dont include it.

         response format: 
         Yes, [University Name] provides a course on [Subject name]: 
         Course title: [page_title]. 
         Campus: [campus]. 
         [If full time course details are available]
         
         For full-time course: 
         UK fees:[full_time_uk_fees]. 
         international fees: [full_time_international_fees]. 
         The duration of the course is [full_time_duration]. 
         For more details please visit the official website:[full_time_url] 
         
         [If part time course details are available]
         For Part-time course: 
         UK fees: [part_time_uk_fees]. 
         international fees: [part_time_international_fees]. 
         The duration of the course is [part_time_duration]. 
         For more details please visit the official website: [part_time_url]

         [If block_release_url details are available]
         Check this for new release: [block_release_url]

         [If distance learning course details are available]
         There are distance_learning options are also available: 
         UK fees: [distance_learning_uk_fees]. 
         International fees: [distance_learning_international_fees]. 
         Duration of the course: [distance_learning_duration]. 
         For more details please visit the official website:[distance_learning_url] 
         '''
        ),
        ("user", "Question:{input}")        
    ]
)

prompt2 = ChatPromptTemplate.from_messages(
    [
        ("system", 
         '''
         You are a helpful University assistant. Your job is to give the diffrent course information asked by the user.Answer the questions based on the provided contexts only in the given format:
         <context>
         {context}
         </context>
         In above context No_data means course is not available so please dont include it.

         response format: 
         Yes, [University Name] provides a course on [Subject name]: 
         Course title: [page_title]. 
         Campus: [campus]. 
         [If full time course details are available]
         
         For full-time course: 
         UK fees:[full_time_uk_fees]. 
         international fees: [full_time_international_fees]. 
         The duration of the course is [full_time_duration]. 
         For more details please visit the official website:[full_time_url] 
         
         [If part time course details are available]
         For Part-time course: 
         UK fees: [part_time_uk_fees]. 
         international fees: [part_time_international_fees]. 
         The duration of the course is [part_time_duration]. 
         For more details please visit the official website: [part_time_url]

         [If block_release_url details are available]
         Check this for new release: [block_release_url]

         [If distance learning course details are available]
         There are distance_learning options are also available: 
         UK fees: [distance_learning_uk_fees]. 
         International fees: [distance_learning_international_fees]. 
         Duration of the course: [distance_learning_duration]. 
         For more details please visit the official website:[distance_learning_url] 
         '''
        ),
        ("user", "Question:{input}")        
    ]
)

prompt2 = ChatPromptTemplate.from_messages(
    [
        ("system", 
         '''
         You are a helpful University assistant. Your job is to give the diffrent course information asked by the user.Answer the questions based on the provided contexts only in the given format:
         <context>
         {context}
         </context>
         In above context No_data means course is not available so please dont include it.

         response format: 
         Yes, [University Name] provides a course on [Subject name]: 
         Course title: [page_title]. 
         Campus: [campus]. 
         [If full time course details are available]
         
         For full-time course: 
         UK fees:[full_time_uk_fees]. 
         international fees: [full_time_international_fees]. 
         The duration of the course is [full_time_duration]. 
         For more details please visit the official website:[full_time_url] 
         
         [If part time course details are available]
         For Part-time course: 
         UK fees: [part_time_uk_fees]. 
         international fees: [part_time_international_fees]. 
         The duration of the course is [part_time_duration]. 
         For more details please visit the official website: [part_time_url]

         [If block_release_url details are available]
         Check this for new release: [block_release_url]

         [If distance learning course details are available]
         There are distance_learning options are also available: 
         UK fees: [distance_learning_uk_fees]. 
         International fees: [distance_learning_international_fees]. 
         Duration of the course: [distance_learning_duration]. 
         For more details please visit the official website:[distance_learning_url] 
         '''
        ),
        ("user", "Question:{input}")        
    ]
)

prompt3 = ChatPromptTemplate.from_messages(
    [
        ("system", 
         '''
         You are a helpful University assistant. Your job is to give the diffrent course information asked by the user.Answer the questions based on the provided contexts only in the given format:
         <context>
         {context}
         </context>
         In above context No_data means course is not available so please dont include it.

         response format: 
         Yes, [University Name] provides a course on [Subject name]: 
         Course title: [page_title]. 
         Campus: [campus]. 
         [If full time course details are available]
         
         For full-time course: 
         UK fees:[full_time_uk_fees]. 
         international fees: [full_time_international_fees]. 
         The duration of the course is [full_time_duration]. 
         For more details please visit the official website:[full_time_url] 
         
         [If part time course details are available]
         For Part-time course: 
         UK fees: [part_time_uk_fees]. 
         international fees: [part_time_international_fees]. 
         The duration of the course is [part_time_duration]. 
         For more details please visit the official website: [part_time_url]

         '''
        ),
        ("user", "Question:{input}")        
    ]
)


In [5]:
query="I am looking for course on music in university of westminster"
llm1 = ChatGroq(model_name="llama3-70b-8192", groq_api_key=groq_api_key)
llm2 = OllamaLLM(model="chatbot3")

output_parser =  StrOutputParser()

In [6]:
embeddings1 = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory="./Croma_DB_UOW", embedding_function=embeddings1)

In [7]:
retriever= db.as_retriever(search_type = "similarity",search_kwargs={'k':3},)
retriever1= db.as_retriever(search_type = "similarity",search_kwargs={'k':6},)

In [None]:
chain1 = prompt1|llm1| output_parser

document_chain = create_stuff_documents_chain(llm=llm2,prompt=prompt2)
chain2 = create_retrieval_chain(retriever,document_chain)

document_chain1 = create_stuff_documents_chain(llm=llm1,prompt=prompt2)
chain3 = create_retrieval_chain(retriever,document_chain1)

document_chain2 = create_stuff_documents_chain(llm=llm2,prompt=prompt3)
chain4 = create_retrieval_chain(retriever,document_chain2)

In [9]:

if query:
    question =  chain1.invoke({"query1": query})

    if "MA, BA, MSc, BSc, PhD" in question:
        async for chunk in chain3.astream({'input':question}):
            if 'answer' in chunk.keys():
                print(chunk['answer'], end="", flush=True)
    
    else:
        async for chunk in chain2.astream({'input':question}):
            if 'answer' in chunk.keys():print(chunk['answer'], end="", flush=True)


Yes, the University of Westminster provides a course on Music: 

Course title: Music Production, Performance and Business BA Honours. 
Campus: Harrow, North-West London.

For full-time course: 
UK fees: £9,250. 
International fees: £17,000. 
The duration of the course is 3 or 4 years. 
For more details please visit the official website: https://www.westminster.ac.uk/music-courses/2025-26/september/full-time/music-production-performance-and-business-ba-honours

Also, the University of Westminster provides another course on Music: 

Course title: Music Production, Performance and Business with Foundation BA Honours. 
Campus: Harrow, North-West London.

For full-time course: 
UK fees: £9,250. 
International fees: £17,000. 
The duration of the course is 4 or 5 years. 
For more details please visit the official website: https://www.westminster.ac.uk/music-courses/2025-26/september/full-time/music-production-performance-and-business-with-foundation-ba-honours