In [4]:
# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Standard library imports
import os
import glob

# Third-party imports
import gradio as gr
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

# Constants
MODEL = "gpt-4o"
DB_NAME = "vector_db"

In [11]:
# Load environment variables and ensure OpenAI API key is set
from dotenv import load_dotenv
import os

load_dotenv(override=True)

# Ensure the OpenAI API key is loaded into the environment
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise EnvironmentError("OPENAI_API_KEY not found in environment variables.")

os.environ["OPENAI_API_KEY"] = openai_api_key

In [22]:
folders = glob.glob('knowledge-base-Amir/*')

In [23]:

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={'encoding': 'utf-8'} )
    folder_docs= loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)


In [24]:
documents

[Document(metadata={'source': 'knowledge-base-Amir/publications/CAERS-CF.md', 'doc_type': 'publications'}, page_content="# 📚 CAERS-CF: Enhancing Convolutional Autoencoder Recommendations through Collaborative Filtering\n\n**Published in**: Knowledge and Information Systems (Springer), 2024  \n**Authors**: Amirhossein Ghadami, Thomas Tran  \n**DOI**: [10.1007/s10115-024-02204-5](https://doi.org/10.1007/s10115-024-02204-5)\n\n---\n\n## 🧠 Overview\n\n**CAERS-CF** is a **hybrid recommendation system** that combines:\n- A novel **deep learning-based model (CAERS)** leveraging **convolutional autoencoders**\n- A **collaborative filtering (CF)** approach based on **singular value decomposition (SVD)**\n\nThe model aims to merge content and behavior-based recommendation strategies using **linear regression** to dynamically weigh their outputs, yielding state-of-the-art accuracy on benchmark datasets.\n\n---\n\n## 🧩 Key Contributions\n\n1. **CAERS** (Convolutional Autoencoder Recommendation Sys

In [25]:
text_splitter = CharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [26]:
print(chunks[0])

page_content='# 📚 CAERS-CF: Enhancing Convolutional Autoencoder Recommendations through Collaborative Filtering

**Published in**: Knowledge and Information Systems (Springer), 2024  
**Authors**: Amirhossein Ghadami, Thomas Tran  
**DOI**: [10.1007/s10115-024-02204-5](https://doi.org/10.1007/s10115-024-02204-5)

---

## 🧠 Overview

**CAERS-CF** is a **hybrid recommendation system** that combines:
- A novel **deep learning-based model (CAERS)** leveraging **convolutional autoencoders**
- A **collaborative filtering (CF)** approach based on **singular value decomposition (SVD)**

The model aims to merge content and behavior-based recommendation strategies using **linear regression** to dynamically weigh their outputs, yielding state-of-the-art accuracy on benchmark datasets.

---

## 🧩 Key Contributions

1. **CAERS** (Convolutional Autoencoder Recommendation System):  
   Captures nonlinear, high-order relationships from users' and items' **content data** via CAE architecture.' metadata

In [27]:
len(chunks)

25

In [28]:
embeddings = OpenAIEmbeddings()
vecotrestore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_NAME)


In [29]:
vecotrestore._collection.count()

25

In [30]:
len(vecotrestore._collection.get(limit=4,include=['embeddings'])['embeddings'][3])

1536

In [31]:
llm = ChatOpenAI(temperature=0.7,model_name='gpt-4o')

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vecotrestore.as_retriever(search_kwargs={"k": 25})
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [32]:
result = conversation_chain.invoke({'question':'what are the papers he wrote and the titles and a short explainations'})

In [33]:
print(result['answer'])

Amir Ghadami has authored the following papers:

1. **TriDeepRec: A Hybrid Deep Learning Approach to Content and Behaviour-based Recommendation Systems**
   - *Published in*: User Modeling and User-Adapted Interaction, 2024
   - *Explanation*: This paper introduces TriDeepRec, a hybrid recommendation system that combines content-based and behavior-based approaches using deep learning. It addresses challenges like cold-start problems and improves prediction accuracy and ranking performance.

2. **CAERS-CF: Enhancing Convolutional Autoencoder Recommendations through Collaborative Filtering**
   - *Published in*: Knowledge and Information Systems, 2024
   - *Explanation*: This paper presents CAERS-CF, a hybrid recommendation system that integrates a convolutional autoencoder with collaborative filtering. It aims to combine content and behavior-based strategies to achieve superior accuracy on benchmark datasets.

3. **TAVo: Tor Application Detection with Voting Critic**
   - *Published in*

In [34]:
conversation_chain.invoke({'question':'what is his degree'})['answer']

'Amir Ghadami holds a Master of Science (MSc) in Computer Science from the University of Ottawa and a Bachelor of Science (BSc) in Computer Engineering from Azad Tehran University.'

In [35]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [38]:
memory.clear()

In [39]:
view = gr.ChatInterface(chat,type='messages').launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.
