In [1]:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import base64
import email
import os

from langchain_community.agent_toolkits import GmailToolkit
from langchain_community.tools.gmail.utils import (
    build_resource_service,
)

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def create_credentials():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # 사용자 인증이 필요한 경우
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # 다음 번 사용을 위해 인증된 사용자 정보를 저장
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

creds = create_credentials()
api_resource = build_resource_service(credentials=creds)
service = build('gmail', 'v1', credentials=creds)
toolkit = GmailToolkit(api_resource=api_resource)

RefreshError: ('invalid_grant: Token has been expired or revoked.', {'error': 'invalid_grant', 'error_description': 'Token has been expired or revoked.'})

In [None]:
tools = toolkit.get_tools()
tools # toolkit initialize

[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x000001B8D58889D0>),
 GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x000001B8D58889D0>),
 GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x000001B8D58889D0>),
 GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x000001B8D58889D0>),
 GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x000001B8D58889D0>)]

In [None]:
from langchain import hub
instructions = """You are an assistant. return output only."""
base_prompt = hub.pull("langchain-ai/openai-functions-template")
prompt = base_prompt.partial(instructions=instructions)

In [None]:
from langchain_openai import ChatOpenAI, OpenAI
from langchain.agents import AgentExecutor, create_openai_functions_agent

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, streaming=True, max_tokens=2048)
agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)

In [None]:
agent_executor = AgentExecutor(
    agent=agent,
    tools=toolkit.get_tools(),
    return_intermediate_steps=True,
    verbose=True
)

In [None]:
search_result = agent_executor.invoke({"input": "`search_gmail` with `{'query': 'from':'Medium Daily Digest', 'max_results': 1}`. Return Most recent matching mail with id ONLY."})
print(search_result["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_gmail` with `{'query': "from:'Medium Daily Digest'", 'max_results': 1, 'resource': 'messages'}`



[1m> Finished chain.[0m
Most recent matching email with ID: 18daeed4aa8dcee0


In [None]:
search_result_output = search_result["output"].split(":")[-1].lstrip(" ") # parse result
print(search_result_output)

18daeed4aa8dcee0


In [None]:
def get_message(service, user_id, message_id):
    try:
        message = service.users().messages().get(userId=user_id, id=message_id, format='raw').execute()
        print('Message snippet: %s' % message['snippet'])

        msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))
        mime_msg = email.message_from_bytes(msg_str)

        # 메일 본문 찾기
        if mime_msg.is_multipart():
            for part in mime_msg.walk():
                if part.get_content_type() == 'text/html':
                    html_content = part.get_payload(decode=True).decode()
                    break
        else:
            html_content = mime_msg.get_payload(decode=True).decode()

        return html_content
    except Exception as error:
        print('An error occurred: %s' % error)

# 메일 내용 가져오기 및 파싱 예제
user_id = 'me'  # 현재 로그인한 사용자
message_id = search_result_output  # 가져오고자 하는 메시지의 ID
html_content = get_message(service, user_id, message_id)


Message snippet: Rhcp Stories for Rhcp @rhcp1134·Become a member Medium daily digest Today&#39;s highlights DataDrivenInvestor Teemu Sormunen in DataDrivenInvestor· 14 min read Improve RAG performance on custom


In [None]:
from langchain.schema.document import Document
from langchain_community.document_transformers import BeautifulSoupTransformer

doc = Document(page_content=html_content)
bs = BeautifulSoupTransformer()
bs_content = bs.transform_documents(documents=[doc], tags_to_extract=["a"])

In [None]:
from bs4 import BeautifulSoup
from urllib.parse import urlparse

soup = BeautifulSoup(html_content, 'html.parser')
links = soup.find_all('a')

def validation(url):
    parsed_url = urlparse(url)
    if parsed_url.scheme == "https" and parsed_url.netloc == "medium.com":
            # 경로에서 @username 확인
            path_parts = parsed_url.path.split('/')
            if len(path_parts) >= 3 and path_parts[1].startswith('@'):
                return True
    return False

url_dict = {}

for link in links:
    text = link.get_text(strip=True)
    url = link.get('href').split("?")[0]

    if validation(url):
        url_dict[url] = text

In [None]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

#Parsing with Custom Pydantic Object
class URL_TABLE(BaseModel):
    url:str = Field(description="url")
    description:str = Field(description="description that describe url")

class URLTextList(BaseModel):
    url_text_pairs: List[URL_TABLE]

parser = PydanticOutputParser(pydantic_object=URLTextList)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
# parse result with PydanticOutputParser
chain = llm | parser
result = chain.invoke(f"""get dict {url_dict}. Show value and url if it is related to LLM or Python or Programming.
    Output should be dictionary like this .
    <Example output>                       
                     {{
    "url_text_pairs": [
        {{
            "url": "https://example.com/1",
            "description": "Example description 1"
        }},
        {{
            "url": "https://example.com/2",
            "description": "Example description 2"
        }}
    ]}}""")

In [None]:
for content in result.url_text_pairs:
    print(content.url)

https://medium.com/@teemusormunen/improve-rag-performance-on-custom-vocabulary-e728b7a691e0
https://medium.com/@lee_vaughan/introducing-conda-environments-7e3fcdbda764
https://medium.com/@alcarazanthony1/leveraging-llms-for-causal-reasoning-why-knowledge-and-algorithms-are-key-d1928b7051c7
https://medium.com/@datatec.studio/fine-tune-an-llama2-for-document-q-a-how-to-build-vector-store-b4e8c9c99363


In [None]:
from datetime import date

today = date.today()

In [None]:
prompt_format = f"Here is your Today Daily LLM Digest!: {today}\n\n"
for idx, content in enumerate(result.url_text_pairs, 1):
    prompt_format += f"{idx}. \"{content.description}\" \n\t-url: {content.url}\n"
print(prompt_format)

Here is your Today Daily LLM Digest!: 2024-02-16

1. "Improve RAG performance on custom vocabularyVector search fails with custom data — find out why" 
	-url: https://medium.com/@teemusormunen/improve-rag-performance-on-custom-vocabulary-e728b7a691e0
2. "Introducing Conda EnvironmentsLearn to love the Command Line!" 
	-url: https://medium.com/@lee_vaughan/introducing-conda-environments-7e3fcdbda764
3. "Leveraging LLMs for Causal Reasoning: Why Knowledge and Algorithms are KeyCausal reasoning — the capacity to understand cause-effect relationships and make inferences about…" 
	-url: https://medium.com/@alcarazanthony1/leveraging-llms-for-causal-reasoning-why-knowledge-and-algorithms-are-key-d1928b7051c7
4. "Fine-Tune an LLaMA2 for Document Q&A — How to build vector storeUsing Llama-2–7B-Chat model we can build a Document Q&A Chatbot based on our own pdf file(s)." 
	-url: https://medium.com/@datatec.studio/fine-tune-an-llama2-for-document-q-a-how-to-build-vector-store-b4e8c9c99363

