In [None]:
import datetime

timestamp_ms = 1713651015101
timestamp = timestamp_ms / 1000
date_time = datetime.datetime.fromtimestamp(timestamp)
print(date_time)  # Output will be a datetime object


In [None]:
# import
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import RecursiveCharacterTextSplitter

# load the document and split it into chunks
loader = TextLoader("1228090209825063002.txt")
documents = loader.load()

# split it into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)



## query function 

In [None]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="llama3-70b-8192", base_url="https://api.groq.com/openai/v1", api_key="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")

def query_chroma(query):
    docs = db.similarity_search(query)
    string = ''
    for i in docs:
        string += i.page_content
    response = model.invoke(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Your job is to answer the user question using the context.
<|start_header_id|>user<|end_header_id|>
Here is the question: {query}
Here is the context: {string}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")
    return response.content


In [None]:
query_response = query_chroma("What other bot name did AusBoss like?")

In [None]:
query_response

In [None]:
import sqlite3
from typing import List, Dict



def get_last_x_messages(db_path: str, channel_id: int, k: int, channel_type: str) -> List[Dict]:
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Determine the query based on channel type
    if channel_type == "dm":
        query = """
        SELECT name, clean_content FROM (
            SELECT COALESCE(global_name, user_name) AS name, clean_content, created_timestamp
            FROM dms
            WHERE channel_id = ?
            ORDER BY created_timestamp DESC
            LIMIT ?
        ) sub ORDER BY created_timestamp ASC
        """
    elif channel_type == "channel":
        query = """
        SELECT name, clean_content FROM (
            SELECT COALESCE(global_name, user_name) AS name, clean_content, created_timestamp
            FROM messages
            WHERE channel_id = ?
            ORDER BY created_timestamp DESC
            LIMIT ?
        ) sub ORDER BY created_timestamp ASC
        """
    else:
        raise ValueError("Invalid channel type")

    # Execute the query and fetch the results
    try:
        cursor.execute(query, (channel_id, k))
        rows = cursor.fetchall()
        messages = [{'name': row[0], 'clean_content': row[1]} for row in rows]
        message_list = []
    
        # Iterate over each message in the list
        for message in messages:
            # Append each message's name and content to the formatted string
            message_list.append(f"{message['name']}: {message['clean_content']}")
    
        return message_list
    except sqlite3.Error as e:
        print(f"Error fetching messages for {channel_type} with channel ID {channel_id}: {e}")
        raise
    finally:
        # Ensure the database connection is closed after operation
        conn.close()

# Example usage:
db_path = 'messages.db'
channel_id=1228090209825063002
messages = get_last_x_messages(db_path, channel_id=channel_id, k=999, channel_type='channel')

with open(f'{channel_id}.txt', encoding='utf-8', mode='a') as f:
    for message in messages:
        f.write(message + '\n')

In [None]:
def get_last_x_messages(db_path: str, channel_id: int, k: int, channel_type: str) -> List[Dict]:
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Determine the query based on channel type
    if channel_type == "dm":
        table = "dms"
    elif channel_type == "channel":
        table = "messages"
    else:
        raise ValueError("Invalid channel type")

    query = f"""
    SELECT user_name AS name, clean_content FROM (
        SELECT user_name, clean_content, created_timestamp
        FROM {table}
        WHERE channel_id = ?
        ORDER BY created_timestamp DESC
        LIMIT ?
    ) sub ORDER BY created_timestamp ASC
    """

    # Execute the query and fetch the results
    cursor.execute(query, (channel_id, k))
    rows = cursor.fetchall()
    messages = [{'name': row[0], 'clean_content': row[1]} for row in rows]
    conn.close()
    return messages



In [None]:
db_path = 'messages.db'
channel_id = 1228090209825063002
messages = get_last_x_messages(db_path, channel_id=channel_id, k=20, channel_type='channel')
messages

In [None]:
def get_table_columns(db_path, table_name):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = cursor.fetchall()
    conn.close()
    return columns

# Example usage to check columns of the 'messages' table
columns = get_table_columns('messages.db', 'messages')
print(columns)


In [None]:
import sqlite3
from typing import List, Dict


def summarize_groups(messages: List[Dict]):
    grouped_messages = [messages[i:i+20] for i in range(0, len(messages), 20)]
    summaries = []
    
    for group in grouped_messages:
        context = ' '.join([f"{msg['name']}: {msg['clean_content']}" for msg in group])
        summary = generate_summary(context)
        summaries.append(summary)
    
    return summaries

# Assuming you have a function 'generate_summary' that wraps around your model API for summarization
# For instance:
def generate_summary(text: str) -> str:
    response = model.invoke(f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Generate a summary of the conversation and .
<|start_header_id|>user<|end_header_id|>
Here is the conversation: {text}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")
    return response.content.strip()

In [None]:
# Example usage:
db_path = 'messages.db'
channel_id = 1228090209825063002
messages = get_last_x_messages(db_path, channel_id=channel_id, k=20, channel_type='channel')
summaries = summarize_groups(messages)



In [None]:
summaries