In [22]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
#from langchain.chains import LLMChain
from langchain_huggingface import HuggingFacePipeline 
#from langchain_community.chat_models import ChatHuggingFace
from langchain.schema import SystemMessage, HumanMessage, AIMessage, StrOutputParser
from langchain_ollama import ChatOllama


In [24]:
import torch
device = 0 if torch.cuda.is_available() else -1

## HuggingFace LLM: phi-2

In [10]:
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [23]:
hf_pipe = pipeline(
    "text-generation",          
    model=model,               
    tokenizer=tokenizer,       
    max_new_tokens=200,       
    do_sample=True,           
    temperature=0.7,         
    pad_token_id=tokenizer.eos_token_id,  
    device=device               # CPU or GPU
)
LLM_01 = HuggingFacePipeline(pipeline=hf_pipe)

Device set to use cpu


## HuggingFace LLM: Mistral

In [None]:
model_name = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model2 = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",    # Auto places model on GPU if available
    load_in_4bit=True     # Requires bitsandbytes (saves VRAM/RAM)
)


In [None]:
hf_2 = pipeline(
    "text-generation",          
    model=model2,                
    tokenizer=tokenizer,        
    max_new_tokens=200,         
    do_sample=True,             
    temperature=0.7,            
    pad_token_id=tokenizer.eos_token_id,  
    device=device      
)
LLM_02 = HuggingFacePipeline(pipeline=hf_2)

## Ollama: llama3.1  [Free Local Model]

In [26]:
LLM_03 = ChatOllama(model="llama3.1", Temperature=0.7,do_sample=True)

## Prompt

In [41]:
prompt=ChatPromptTemplate.from_messages([
    ('system','You are an AI assistant, and provide no more than 220 tokens whey write a response to a question'),
    MessagesPlaceholder(variable_name='chat_history'),
    ('user','{text}')
])

## Chains

In [43]:
ChainLLama31=prompt|LLM_03|StrOutputParser()

## Request

In [45]:
chat_history=[]

In [47]:
query='I am an AI specialist who is expert in online knowledge augmentation from nonstationary Data Streams'
response_message=ChainLLama31.invoke({'chat_history':chat_history,'text':query})

In [51]:
print((response_message))

That's fascinating! Online knowledge augmentation from non-stationary data streams is a challenging problem in the field of Artificial Intelligence and Machine Learning. Non-stationarity refers to the fact that the underlying distribution of the data changes over time, making it difficult to develop models that can adapt and learn from the new patterns.

As an expert in online knowledge augmentation, you must be well-versed in techniques such as incremental learning, transfer learning, and few-shot learning. You likely also have experience with real-time data processing, streaming algorithms, and model adaptation strategies.

Can you tell me more about your work in this area? What are some of the most significant challenges you've encountered, and how do you approach solving them?


In [54]:
chat_history.extend([HumanMessage(content=query), AIMessage(content=response_message)])

In [56]:
query='What is my area of expertise?'
response_message=ChainLLama31.invoke({'chat_history':chat_history,'text':query})
print((response_message))

Your area of expertise is Online Knowledge Augmentation from Non-Stationary Data Streams. Specifically, you're an expert in developing AI models that can adapt and learn from changing data distributions over time, making it possible to continuously update and improve knowledge based on new information.


In [58]:
chat_history.extend([HumanMessage(content=query), AIMessage(content=response_message)])

In [60]:
query='Why do prefer most of people in Iran to live in Tehran?'
response_message=ChainLLama31.invoke({'chat_history':chat_history,'text':query})
print((response_message))

As a knowledgeable assistant, I don't have personal preferences or opinions, but I can provide some insights.

Tehran is the capital and largest city of Iran, with a rich history, culture, and economic significance. Many Iranians migrate to Tehran for better job opportunities, education, and access to modern amenities. However, this has also led to concerns about urbanization, congestion, and strain on resources.

If I had to provide an objective answer, it's likely due to the concentration of educational institutions, research centers, and industries in Tehran, making it a hub for career advancement and economic growth.


In [62]:
chat_history.extend([HumanMessage(content=query), AIMessage(content=response_message)])

In [90]:
query='What was the context of my previous message?'
response_message=ChainLLama31.invoke({'chat_history':[chat_history[-2].content,chat_history[-1].content],'text':query})
print((response_message))

In your question, you asked why most people in Iran prefer to live in Tehran. I provided some insights on the city's significance, job opportunities, education, and modern amenities, which might contribute to its appeal.


In [92]:
chat_history.extend([HumanMessage(content=query), AIMessage(content=response_message)])