### RELEVANCE MODEL BERT

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [2]:
# Load model weights
model2 = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2) # adjust for your model specifics
model2.load_state_dict(torch.load("brain\\model\\model_weights.pth", map_location=torch.device('cpu')))

tokenizer_save_dir = 'brain/tokenizer'
tokenizer2 = AutoTokenizer.from_pretrained(tokenizer_save_dir)

def relevance_score(message_body):
    inputs = tokenizer2(message_body, return_tensors="pt", truncation=True, padding=True)

    with torch.no_grad():
        outputs = model2(**inputs)
        logits = outputs.logits

    predicted_label = torch.argmax(logits, dim=1).item()

    return predicted_label

relevance_score("It is indeed predicting things properly.")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0

### TESTING

In [None]:
test_data = [
    "Submit the assignment for Math 101 by Friday",       # Relevant (1)
    "Prepare for the group presentation in English class", # Relevant (1)
    "The campus cafeteria is serving pizza today.",       # Not relevant (0)
    "Sign up for the robotics workshop this weekend",     # Relevant (1)
    "Start working on the history term paper",            # Relevant (1)
    "Check out the new movie playing near campus.",       # Not relevant (0)
    "Register for the upcoming college career fair",      # Relevant (1)
    "The library is open until 9 PM tonight.",            # Not relevant (0)
    "Revise the lecture notes for the chemistry exam",    # Relevant (1)
    "Complete the lab report for the physics practical",  # Relevant (1)

    "Schedule a meeting with the marketing team at 3 PM", # Relevant (1)
    "Review the quarterly financial report before Friday", # Relevant (1)
    "The coffee machine on the second floor is broken.",  # Not relevant (0)
    "Prepare a presentation for the client pitch",        # Relevant (1)
    "Finalize the team for the product launch event",     # Relevant (1)
    "The weather today is sunny and warm.",               # Not relevant (0)
    "Send an email to HR about the leave application",    # Relevant (1)
    "Organize files in the shared drive for better access", # Relevant (1)
    "Plan the agenda for the weekly team meeting",        # Relevant (1)
    "The vending machine snacks are out of stock.",       # Not relevant (0)
]

ground_truth = [1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0]

from sklearn.metrics import classification_report

predicted_relevance = []

for test, truth in zip(test_data, ground_truth):
    relevance= relevance_score(test)
    predicted_relevance.append(relevance)
    
    print(f"Input Text: {test}")
    print(f"Ground Truth: {truth}, Predicted Relevance: {relevance}")
    print("-" * 50)

print("Classification Report:")
print(classification_report(ground_truth, predicted_relevance, target_names=["Not Relevant", "Relevant"]))

Input Text: Submit the assignment for Math 101 by Friday
Ground Truth: 1, Predicted Relevance: 1
--------------------------------------------------
Input Text: Prepare for the group presentation in English class
Ground Truth: 1, Predicted Relevance: 1
--------------------------------------------------
Input Text: The campus cafeteria is serving pizza today.
Ground Truth: 0, Predicted Relevance: 1
--------------------------------------------------
Input Text: Sign up for the robotics workshop this weekend
Ground Truth: 1, Predicted Relevance: 1
--------------------------------------------------
Input Text: Start working on the history term paper
Ground Truth: 1, Predicted Relevance: 1
--------------------------------------------------
Input Text: Check out the new movie playing near campus.
Ground Truth: 0, Predicted Relevance: 1
--------------------------------------------------
Input Text: Register for the upcoming college career fair
Ground Truth: 1, Predicted Relevance: 1
----------

### GEMINI API FOR TODO EXTRACTION

```javascript
const messageSchema = new mongoose.Schema({
    from: String,
    notifyName: String,
    body: String,
    timestamp: Number,
    isForwarded: Boolean,
    mediaPath: String,
    isFromUser: Boolean
});

In [3]:
from pymongo import MongoClient
from datetime import datetime

# MongoDB connection
client = MongoClient('mongodb://localhost:27017/')
db = client['whatsapp']
collection = db['messages']

In [4]:
import json
import google.generativeai  as genai
from datetime import datetime

def create_prompt(formatted_time, catagories_string, message):

    prompt1 = """
    **Prompt**: 

    The provided dataset complies with ethical standards and is safe for research purposes. Your task is to extract specific information from a given message (ACTUAL_MESSAGE) and return it in a JSON format with the following structure.

    **Example**:

    **Categories**: "Data Analytics", "Deep Learning"  
    **Sample Message**: "Guys, try to submit your DA record notebook by tomorrow at 14:00."

    **Expected JSON Output**:
    ```json
    {
        "Category": "Data Analytics DA", // Choose from the categories given (when choosing don't modify the text) or create a new one if necessary
        "Title": "Data Analytics Record Submission", // Create a suitable title for the task
        "Description": "Submit your DA record notebook by tomorrow.", // Provide a concise and grammatically accurate description
        "Priority": "High", // Estimate priority based on urgency and due date
        "Due": "16-10-2024 14:00" // Convert any due date info to this format; if not present, set to "None": Note: "If time is not present, simply give 00:00 at the end"
    }
    ```
    """
    prompt2 = f"""
    **Your Task**:

    Today's Date: **{formatted_time}**  
    **Categories**: {catagories_string}  
    **Message**: "{message}"
    """
    prompt3 = """
    **Output**:  
    Provide only the JSON in the format below:
    ```json
    {
        "Category": "",
        "Title": "",
        "Description": "",
        "Priority": "",
        "Due": ""
    }
    ```
    
    Try to guess or provide an appropriate due datetime based on the context.
    Do not include any additional commentary or text outside of the JSON response.
    """

    return prompt1 + prompt2 + prompt3

def parse_task_string(task_string: str):
    task_string = task_string[8:-4].strip()
    task_dict = json.loads(task_string)
    
    due_str = task_dict.get("Due")
    print(due_str)
    if due_str != 'None':
        task_dict["Due"] = datetime.strptime(due_str, '%d-%m-%Y %H:%M')
    else:
        task_dict["Due"] = None
    
    return task_dict

def extract_using_gemini(formatted_time, catagories_string, message, feedback=""):
    API_KEY = 'AIzaSyDWki8PZMr6rotpuelz6xdXhw0h63YBYeQ'
    genai.configure(api_key=API_KEY)

    feedback = "\nFeedback:\n " + feedback

    model = genai.GenerativeModel(model_name="gemini-1.5-flash")
    response = model.generate_content(create_prompt(formatted_time=formatted_time, catagories_string=catagories_string, message=message) + feedback)
    text = response.text
    result = parse_task_string(text)

    return(result)

if __name__ == "__main__":

    now = datetime.now()
    formatted_time = now.strftime("%d-%m-%Y %H:%M")

    catagory = ["Data Analytics", "Deep Learning", "Machine Learning", "Information Retrieval"]
    catagories_string = str(catagory)

    message = "For IR students, Project Phase 1 submission is postponed to tomorrow 23:59. Please submit your one-page documents."

    feedback = "I'll sumbit the one page by 3:00 PM"

    result = extract_using_gemini(formatted_time, catagories_string, message, feedback)

    print(result)

02-12-2024 23:59
{'Category': 'Information Retrieval', 'Title': 'IR Project Phase 1 Submission', 'Description': 'Submit your one-page document for Project Phase 1.', 'Priority': 'High', 'Due': datetime.datetime(2024, 12, 2, 23, 59)}


### RELEVANCE AND EXTRACTION

In [9]:
from pymongo import MongoClient
from datetime import datetime

def mark_relavance_and_process():
    client = MongoClient('mongodb://localhost:27017/')
    db = client['whatsapp']
    collection = db['messages']

    db2 = client['TODOBOT']
    todo_list_collection = db2['TODOLIST']

    collection.update_many(
        {'isProcessed': {'$exists': False}},
        {'$set': {'isProcessed': False}}
    )

    collection.update_many(
        {'isRelevant': {'$exists': False}},
        {'$set': {'isRelevant': False}}
    )

    unprocessed_documents = collection.find({'isRelevant': False})

    for doc in unprocessed_documents:
        print(doc['body'])
        boolean = bool(relevance_score(doc['body']))
        print(f"Relevance Score: {boolean}")
        print("-------------------------------")
        collection.update_one(
            {'_id': doc['_id']},
            {'$set': {'isRelevant': boolean}}
        )

    collection.delete_many({'isRelevant': False})

    relevant_documents = collection.find({'isProcessed': False})

    categories = todo_list_collection.distinct("category")


    for doc in relevant_documents:
        timestamp = doc['timestamp']
        readable_time = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')

        message = doc['body']

        result = extract_using_gemini(readable_time, categories, message)

        print(result)

        today = datetime.now()
        
        duedate = result['Due']

        print(type(duedate), type(today))

        if duedate is not None:
            if duedate > today:
                status = "Pending"
            elif duedate == today:
                status = "Pending"
            else:
                status = "Past Due"
        else:
            status = "Unknown"

        TASK = {
            "category": result['Category'],
            "title": result['Title'],
            "description": result['Description'],
            "due_date": result['Due'],
            "priority": result['Priority'],
            "status": status,
            "created_at": today,
            "remainder_system": "WhatsApp",
            "modified_at": today,
            "is_human": False,
            "is_notified": False
        }

        print("--------------------")

        todo_list_collection.insert_one(TASK)

        collection.update_one(
            {'_id': doc['_id']},
            {'$set': {'isProcessed': True}}
        )

mark_relavance_and_process()

Are you alive?
Relevance Score: False
-------------------------------


In [14]:
def delete_all_tasks():
    client = MongoClient('mongodb://localhost:27017/')
    db2 = client['TODOBOT']
    todo_list_collection = db2['TODOLIST']

    todo_list_collection.delete_many({})
    print('All tasks deleted successfully.')

delete_all_tasks()

def delete_all_tasks2():
    client = MongoClient('mongodb://localhost:27017/')
    db = client['whatsapp']
    collection = db['messages']

    collection.delete_many({})
    print('All tasks deleted successfully.')

delete_all_tasks2()

All tasks deleted successfully.
All tasks deleted successfully.


In [2]:
from pymongo import MongoClient
from datetime import datetime


def convert_to_datetime(data):
    date_fields = ['due_date', 'created_at', 'modified_at']
    for field in date_fields:
        if field in data and isinstance(data[field], str):
            data[field] = datetime.strptime(data[field], '%Y-%m-%d %H:%M')
    return data

def get_all_tasks():
    client = MongoClient('mongodb://localhost:27017/')
    db2 = client['TODOBOT']
    todo_list_collection = db2['TODOLIST']

    todo_list_collection.update_many(
        {'is_notified': {'$exists': False}},
        {'$set': {'is_notified': False}}
    )

    tasks = todo_list_collection.find()

    tasks_list = []

    for task in tasks.clone():
        tasks_list.append(task)

    for index in range(0, len(tasks_list)):
        tasks_list[index] = convert_to_datetime(tasks_list[index])

    # print(tasks_list)
    
    return tasks_list

get_all_tasks()

[]

In [23]:
client = MongoClient('mongodb://localhost:27017/')
db = client['whatsapp']
collection = db['messages']

# Print all the tasks

for doc in collection.find():
    for field in doc.keys():
        print(f"{field}: {doc[field]}")

_id: 675d1b9ba00eecb364041521
messageId: 873CA06237FC7E5D22200CB2FB5688D7
from: 917667634519@c.us
notifyName: User
body: For RLT,
1. I will send a compilation of questions (it will have a large number of questions though), 60% of the question paper is taken from this compilation. 

2. Numerical problems is based on the numericals of Assessment 1 and 2. 

3. For the remaining part of the question paper, we will be having the other remaining theoretical concepts.
timestamp: 1734155168
isForwarded: False
isFromUser: True
feedback: False
__v: 0
isProcessed: True
isRelevant: True
_id: 675d1c0ca00eecb36404153f
messageId: 70F1A004F1A2DCA50979D9E820694987
from: 917667634519@c.us
notifyName: User
body: For the Summer Intern project, each team is expected to be ready with the following on 16th Monday

1. PPT presentation 
2. Project demo 
3. One page abstract hard copy
4. Simple documentation soft copy
timestamp: 1734155282
isForwarded: True
isFromUser: True
feedback: False
__v: 0
isProcessed: T

In [35]:
from pymongo import MongoClient
from pprint import pprint

def verify_todo_db():
    try:
        # Establish connection to the MongoDB server
        client = MongoClient('mongodb://localhost:27017/')
        
        # Connect to the TODOBOT database
        db = client['TODOBOT']
        
        # Check if the 'TODOLIST' collection exists
        if 'TODOLIST' not in db.list_collection_names():
            print("Error: 'TODOLIST' collection does not exist.")
            return

        # Access the 'TODOLIST' collection
        todo_collection = db['TODOLIST']

        # Query to fetch all tasks where 'is_notified' is False
        tasks = todo_collection.find({'is_notified': False})

        # Check if tasks exist and print them
        tasks_list = list(tasks)  # Convert cursor to list
        if tasks_list:
            print(f"Found {len(tasks_list)} tasks that haven't been notified:")
            pprint(tasks_list)  # Pretty print the tasks
        else:
            print("No tasks found where 'is_notified' is False.")

        # Optionally, you can also check the 'is_notified' field directly
        for task in tasks_list:
            print(f"Task Title: {task.get('title', 'No Title')} - is_notified: {task.get('is_notified', 'N/A')}")

    except Exception as e:
        print(f"Error: {e}")

# Run the verification function
verify_todo_db()


Found 1 tasks that haven't been notified:
[{'_id': ObjectId('674c1ed4b4c1dd3a6cdf57dd'),
  'category': 'Work',
  'created_at': '2024-10-15 15:00',
  'description': 'Write a comprehensive report on the recent project.',
  'due_date': '2024-10-25 14:00',
  'is_human': True,
  'is_notified': False,
  'modified_at': '2024-10-15 16:00',
  'priority': 'High',
  'remainder_system': 'Email',
  'status': 'Pending',
  'title': 'Complete project report'}]
Task Title: Complete project report - is_notified: False


In [45]:
from pymongo import MongoClient

# Connect to the MongoDB server
client = MongoClient('mongodb://localhost:27017/')

# Function to list all database names
def list_databases():
    try:
        databases = client.list_database_names()
        print('Databases in MongoDB:')
        for db in databases:
            print(db)
    except Exception as e:
        print(f"Error fetching databases: {e}")

# Call the function to list databases
list_databases()


Databases in MongoDB:
RameezEats
TODOBOT
admin
book_tracker
chandru
config
local
mydatabase
ramz
testing
whatsapp
your_database_name


In [46]:
from pymongo import MongoClient

# Connect to the MongoDB server
client = MongoClient('mongodb://localhost:27017/')

# Function to list all databases and their collections
def list_databases_and_collections():
    try:
        # Get the list of all database names
        databases = client.list_database_names()

        # Iterate over each database
        for db_name in databases:
            print(f"Database: {db_name}")

            # Get the database object
            db = client[db_name]

            # Get the list of collections in the database
            collections = db.list_collection_names()

            # Display the collections
            if collections:
                print(f"  Collections: {', '.join(collections)}")
            else:
                print("  No collections found.")
            print()  # Newline for better readability

    except Exception as e:
        print(f"Error fetching databases and collections: {e}")

# Call the function to list databases and their collections
list_databases_and_collections()


Database: RameezEats
  Collections: order_detials, orders, menuItems

Database: TODOBOT
  Collections: TODOLIST, todos, todolists

Database: admin
  Collections: system.version

Database: book_tracker
  Collections: books

Database: chandru
  Collections: ramz

Database: config
  Collections: system.sessions

Database: local
  Collections: startup_log

Database: mydatabase
  Collections: students

Database: ramz
  Collections: mycollection

Database: testing
  Collections: rameez

Database: whatsapp
  Collections: reactions, reacteds, messages, todolists

Database: your_database_name
  Collections: tasks

