# Scratch

---

### Imports & Setup

Installs

In [20]:
# Run this cell to install required packages in your Jupyter notebook environment
!pip install openai sqlalchemy sqlmodel psycopg2-binary pandas

Collecting openai
  Downloading openai-1.26.0-py3-none-any.whl.metadata (21 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.26.0-py3-none-any.whl (314 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)
Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2

Connect to DB

In [2]:
from sqlalchemy import create_engine
from sqlmodel import Session, SQLModel, select

# PostgreSQL connection URL
DATABASE_HOST = "db"  # Service name in docker-compose.yml
DATABASE_PORT = "5432"
DATABASE_NAME = "updatesdb"
DATABASE_USER = "postgres"
DATABASE_PASSWORD = "your_password"

ENGINE_URL = f"postgresql://{DATABASE_USER}:{DATABASE_PASSWORD}@{DATABASE_HOST}:{DATABASE_PORT}/{DATABASE_NAME}"

engine = create_engine(ENGINE_URL, echo=True, pool_pre_ping=True)

# Optional: If you need to create the tables in your database (uncomment if necessary)
# SQLModel.metadata.create_all(engine)

Models

In [3]:
from typing import Optional
from sqlmodel import SQLModel, Field, Relationship
from sqlalchemy import Column, BigInteger

class ChatType(SQLModel, table=True):
    id: Optional[int] = Field(default=None, primary_key=True)
    type_name: str = Field(sa_column_kwargs={"unique": False})

class Chat(SQLModel, table=True):
    id: int = Field(primary_key=True)
    chat_id: Optional[int] = Field(default=None, sa_column=Column(BigInteger()))
    all_members_are_administrators: bool = Field()
    title: str = Field()
    type_id: int = Field(foreign_key="chattype.id")
    type: ChatType = Relationship()

class User(SQLModel, table=True):
    id: int = Field(primary_key=True, index=True)
    user_id: Optional[int] = Field(default=None)  # Telegram user ID
    first_name: str = Field()
    last_name: Optional[str] = Field(default=None)
    is_bot: bool = Field()
    language_code: str = Field()
    username: Optional[str] = Field()

class Message(SQLModel, table=True):
    id: int = Field(primary_key=True)
    message_id: int = Field(default=None)
    channel_chat_created: bool = Field()
    chat_id: int = Field(foreign_key="chat.id")
    chat: Chat = Relationship()
    date: int = Field()
    delete_chat_photo: bool = Field()
    from_user_id: int = Field(foreign_key="user.id")
    from_user: User = Relationship()
    group_chat_created: bool = Field()
    reply_to_message_id: Optional[int] = Field(default=None, foreign_key="message.id")
    reply_to_message: 'Message' = Relationship(sa_relationship_kwargs={"remote_side": "Message.id"})
    supergroup_chat_created: bool = Field()
    text: Optional[str] = Field()

class Update(SQLModel, table=True):
    update_id: int = Field(primary_key=True)
    message_id: Optional[int] = Field(default=None, foreign_key="message.id")
    message: Optional[Message] = Relationship()

---

### Display Tables (in pandas)

In [4]:
import pandas as pd

def display_table_data_pandas(model_class):
    with Session(engine) as session:
        statement = select(model_class)
        results = session.exec(statement).all()
        return pd.DataFrame([result.__dict__ for result in results])

In [5]:
# Display data using pandas for better formatting
data = display_table_data_pandas(Message)
data.head()

2024-05-07 09:57:56,464 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2024-05-07 09:57:56,464 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-05-07 09:57:56,464 INFO sqlalchemy.engine.Engine select current_schema()
2024-05-07 09:57:56,465 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-05-07 09:57:56,465 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2024-05-07 09:57:56,465 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-05-07 09:57:56,466 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-05-07 09:57:56,471 INFO sqlalchemy.engine.Engine SELECT message.id, message.message_id, message.channel_chat_created, message.chat_id, message.date, message.delete_chat_photo, message.from_user_id, message.group_chat_created, message.reply_to_message_id, message.supergroup_chat_created, message.text 
FROM message
2024-05-07 09:57:56,471 INFO sqlalchemy.engine.Engine [generated in 0.00027s] {}
2024-05-07 09:57:56,473 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,_sa_instance_state,id,channel_chat_created,delete_chat_photo,reply_to_message_id,supergroup_chat_created,text,message_id,chat_id,date,from_user_id,group_chat_created
0,<sqlalchemy.orm.state.InstanceState object at ...,1,False,False,,False,"Earth in forgetful snow, feeding",180,1,1714930267,1,False
1,<sqlalchemy.orm.state.InstanceState object at ...,2,False,False,,False,"Earth in forgetful snow, feeding",180,3,1714930267,3,False
2,<sqlalchemy.orm.state.InstanceState object at ...,3,False,False,2.0,False,A little life with dried tubers.,181,2,1714930290,2,False
3,<sqlalchemy.orm.state.InstanceState object at ...,4,False,False,,False,"Earth in forgetful snow, feeding",180,5,1714930267,5,False
4,<sqlalchemy.orm.state.InstanceState object at ...,5,False,False,4.0,False,"Summer surprised us, coming over the Starnberg...",182,4,1714930308,4,False


---

### Reconstruct Chat History

In [11]:
from datetime import datetime, timedelta

def reconstruct_chat_as_text(engine, start_time: datetime, end_time: datetime):
    chat_history = []
    with Session(engine) as session:
        # Define the query with a time filter and order by date
        statement = select(Message, User).join(User).where(
            (Message.date >= start_time.timestamp()) & 
            (Message.date <= end_time.timestamp())
        ).order_by(Message.date)

        results = session.exec(statement).all()

        for message, user in results:
            if message.reply_to_message_id:
                # Fetch the original message and user if the current message is a reply
                original_message = session.get(Message, message.reply_to_message_id)
                original_user = session.get(User, original_message.from_user_id)
                chat_line = f"{user.username}: <replying to: {original_user.username}: {original_message.text}> {message.text}"
            else:
                chat_line = f"{user.username}: {message.text}"
            
            chat_history.append(chat_line)

    return "\n".join(chat_history)

In [15]:
# Get dates
today = datetime.now()
previous_day = today - timedelta(days=2)
# Set start_time to the beginning of the previous day (00:00:00)
start_time = datetime(previous_day.year, previous_day.month, previous_day.day, 0, 0, 0)
# Set end_time to the end of the previous day (23:59:59)
end_time = datetime(previous_day.year, previous_day.month, previous_day.day, 23, 59, 59)

# Get chat history text
chat_text = reconstruct_chat_as_text(engine, start_time, end_time)

2024-05-07 10:40:16,851 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-05-07 10:40:16,851 INFO sqlalchemy.engine.Engine SELECT message.id, message.message_id, message.channel_chat_created, message.chat_id, message.date, message.delete_chat_photo, message.from_user_id, message.group_chat_created, message.reply_to_message_id, message.supergroup_chat_created, message.text, "user".id AS id_1, "user".user_id, "user".first_name, "user".last_name, "user".is_bot, "user".language_code, "user".username 
FROM message JOIN "user" ON "user".id = message.from_user_id 
WHERE message.date >= %(date_1)s AND message.date <= %(date_2)s ORDER BY message.date
2024-05-07 10:40:16,852 INFO sqlalchemy.engine.Engine [cached since 158.6s ago] {'date_1': 1714867200.0, 'date_2': 1714953599.0}
2024-05-07 10:40:16,852 INFO sqlalchemy.engine.Engine ROLLBACK


In [21]:
print(chat_text)

Ramshreyas: Earth in forgetful snow, feeding
Ramshreyas: Earth in forgetful snow, feeding
Ramshreyas: Earth in forgetful snow, feeding
Ramshreyas: <replying to: Ramshreyas: Earth in forgetful snow, feeding> A little life with dried tubers.
Ramshreyas: <replying to: Ramshreyas: Earth in forgetful snow, feeding> Summer surprised us, coming over the Starnbergersee
Ramshreyas: Summer surprised us, coming over the Starnbergersee
Ramshreyas: <replying to: Ramshreyas: Summer surprised us, coming over the Starnbergersee> With a shower of rain; we stopped in the colonnade,
Ramshreyas: And went on in sunlight, into the Hofgarten,


---

### Extract topics

In [42]:
from openai import OpenAI

client = OpenAI(api_key="your_api_key_here")

def extract_topics_from_chat(chat_text):
    # Using the chat-specific completion API
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",  # Use an appropriate model, like 'gpt-3.5-turbo'
        messages=[{"role": "system", "content": "Extract the main topics from the following group chat:"},
                  {"role": "user", "content": chat_text}],
        max_tokens=300,
        temperature=0.5,
        stop=["\n\n"]
    )
    
    return response.choices[0].message.content

In [43]:
# Assume the OpenAI client is initialized and `extract_topics_from_chat` is defined
dummy_chat_text = """
Alice: I just got a new kitten yesterday!
Bob: Oh, that's awesome! What breed?
Alice: She's a Maine Coon, absolutely adorable.
Charlie: Speaking of pets, I've been thinking about getting a fish tank.
Alice: Fish are cool, but they seem like a lot of work.
Bob: <replying to: Charlie: Speaking of pets, I've been thinking about getting a fish tank.> It's not too bad, you should definitely go for it. I had one as a kid.
Charlie: Any recommendations for beginner fish?
Bob: Goldfish are pretty straightforward, but bettas have more personality.
Alice: <replying to: Bob: Oh, that's awesome! What breed?> Cats are easier to handle though, don't you think?
Bob: <replying to: Alice: Cats are easier to handle though, don't you think?> Definitely, cats are more independent.
Charlie: I like dogs more though. They're more interactive.
Alice: I'm allergic to dogs unfortunately.
Bob: <replying to: Charlie: I like dogs more though. They're more interactive.> Dogs are great for active people. Always ready to go out.
Charlie: <replying to: Bob: Goldfish are pretty straightforward, but bettas have more personality.> I might consider a betta then.
Alice: Just make sure the tank is big enough, they need space to thrive.
Bob: <replying to: Alice: Just make sure the tank is big enough, they need space to thrive.> Yeah, and keep the water clean!
"""

topics = extract_topics_from_chat(dummy_chat_text)

In [44]:
print(topics)

1. Pets (kitten, fish, cats, dogs)
2. Fish tanks and fish care (beginner fish, tank size, water cleanliness)
3. Different pet preferences (cats, dogs, fish)
4. Allergies to pets
5. Pet ownership responsibilities and considerations
