# Preliminary Design with Langchain and Ollama

In [4]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.2.4-py3-none-any.whl.metadata (2.4 kB)
Collecting PyYAML>=5.3 (from langchain_community)
  Using cached PyYAML-6.0.1-cp311-cp311-win_amd64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain_community)
  Using cached SQLAlchemy-2.0.30-cp311-cp311-win_amd64.whl.metadata (9.8 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain_community)
  Using cached aiohttp-3.9.5-cp311-cp311-win_amd64.whl.metadata (7.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Using cached dataclasses_json-0.6.6-py3-none-any.whl.metadata (25 kB)
Collecting langchain<0.3.0,>=0.2.0 (from langchain_community)
  Downloading langchain-0.2.3-py3-none-any.whl.metadata (6.9 kB)
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain_community)
  Downloading langchain_core-0.2.5-py3-none-any.whl.metadata (5.8 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain_community)
  Downloading langsmith-0.1.75-py3-none-

In [1]:
from langchain_community.llms import Ollama

## Testing out the local models
1. Llama3 - 8b

In [1]:
llm = Ollama(model="llama3:8b")
llm.invoke("Tell me a joke",stop=['<|eot_id|>'])

"Here's one:\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything!\n\nHope that made you smile! Do you want to hear another?"

2. Mistralv2 - 7b 

In [4]:
from langchain_community.llms import Ollama
llm = Ollama(model="mistral:v0.2")
llm.invoke("Tell me a joke",stop=['<|eot_id|>'])

" Why don't scientists trust atoms?\n\nBecause they make up everything!"

## Testing model's capabilities for the preliminary design

### Requirements :

1. Code Ability
2. Code Review
3. Context understanding
4. Memory

1. Code Ability 
    - without access to internet resources

In our environment, to create a "Contributor" Agent, there will be multiple actions that the agent must do such as summarize the github issue, call the write_code method for different functions, and write tests for the code written, before merging and submitting for review. Here I demonstrate from them, summarizing a github issue, planning out the approach, calling the write code function for each and summaring the ans in a pull request.

In [21]:
import re

class CodingAgent:

    def __init__(self, model_name):
        self.role = """ You are a senior developer who writes efficient and neat code for problems
         and can teach a junior developer how to code. Your language of choice is Python. Don't explain
         the code, just generate the code block itself. """
        self.llm = Ollama(model=model_name)
        self.context_history = {}
        
        
    def reset(self):
        self.context_history = {}
    
    def _parse_code(self, ans):
        pattern = r"```python(.*)```"
        match = re.search(pattern, ans, re.DOTALL)
        code_text = match.group(1) if match else ans
        return code_text
    
    def write_code(self, instruction):
        self.prompt= f""" Write a python function that can {instruction}.
    Return ```python your_code_here ``` with NO other texts,
    your code:
    """
        ans = self.llm.invoke(self.prompt,stop=['<|eot_id|>'])
        code_text = self._parse_code(ans)

        return code_text


#### Output from the Llama3 Model

In [18]:
coder = CodingAgent("llama3:8b")
code = coder.ask("A simple script to check the current weather.")
print(code)


```
import requests
import json

class WeatherChecker:
    def __init__(self, api_key):
        self.api_key = api_key

    def get_weather(self, city):
        url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={self.api_key}"
        response = requests.get(url)
        return json.loads(response.text)

    def print_weather(self, weather_data):
        print(f"Weather in {weather_data['name']}:")
        print(f"Description: {weather_data['weather'][0]['description']}")
        print(f"Temperature: {weather_data['main']['temp']}K")

def main():
    api_key = "YOUR_API_KEY_HERE"
    city = "London"
    weather_checker = WeatherChecker(api_key)
    weather_data = weather_checker.get_weather(city)
    weather_checker.print_weather(weather_data)

if __name__ == "__main__":
    main()

import unittest
class TestWeatherChecker(unittest.TestCase):
    def test_get_weather(self):
        api_key = "YOUR_API_KEY_HERE"
        city = "London"
        weather_checker = Weath

#### Output from the Mistralv0.2 model

In [20]:
coder = CodingAgent("mistral:v0.2")
code = coder.ask("A scientific calculator.")
print(code)


# Calculator class definition
class Calculator:
    def __init__(self):
        self.history = []

    def add(self, a, b):
        result = a + b
        self.history.append((a, '+', b, result))
        return result

    def subtract(self, a, b):
        result = a - b
        self.history.append((a, '-', b, result))
        return result

    def multiply(self, a, b):
        result = a * b
        self.history.append((a, '*', b, result))
        return result

    def divide(self, a, b):
        if b == 0:
            raise ValueError("Division by zero is not allowed.")
        result = a / b
        self.history.append((a, '/', b, result))
        return result

    def display_history(self):
        for entry in reversed(self.history):
            print(f"{entry[0]} {entry[1]} {entry[2]} = {entry[3]}")

# Unit tests
def test_calculator():
    calc = Calculator()

    # Addition test
    assert calc.add(2, 3) == 5
    assert calc.add(-2, -3) == 1

    # Subtraction test
    asser

In [32]:
import re
import json

class CodingAgent:

    def __init__(self, model_name):
        self.role = """You are a senior developer who writes efficient and neat code for problems
        and can teach a junior developer how to code. Your language of choice is Python. Don't explain
        the code, just generate the code block itself."""
        self.llm = Ollama(model=model_name)
        self.context_history = {}

    def reset(self):
        self.context_history = {}

    def _parse_code(self, ans):
        pattern = r"```python(.*?)```"
        matches = re.findall(pattern, ans, re.DOTALL)
        return matches if matches else [ans]

    def write_code(self, instruction):
        # Include previous context in the prompt
        context = "\n".join(self.context_history.get("code_snippets", []))
        self.prompt = f"""
        Context:
        {context}
        Write a python function that can {instruction}.
        Return ```python your_code_here ``` with NO other texts,
        your code:
        """
        ans = self.llm.invoke(self.prompt, stop=['<|eot_id|>'])
        code_texts = self._parse_code(ans)
        
        # Update context history with new code
        self.context_history.setdefault("code_snippets", []).extend(code_texts)
        return code_texts

    def summarize_issue(self, issue_text):
        self.prompt = f""" Summarize the following GitHub issue:
        Issue:
        {issue_text}
        Summary:
        """

        # print(self.prompt)
        
        ans = self.llm.invoke(self.prompt, stop=['<|eot_id|>'])
        issue_summary = ans.strip()
        
        # Update context history with issue summary
        self.context_history["issue_summary"] = issue_summary
        return issue_summary

    def plan_approach(self, issue_summary):
        self.prompt = f""" Given the following issue summary, plan out the approach to solve it:
        Summary:
        {issue_summary}
        Approach:
        """
        ans = self.llm.invoke(self.prompt, stop=['<|eot_id|>'])
        approach = ans.strip()
        
        # Update context history with approach
        self.context_history["approach"] = approach
        return approach

    def create_pull_request_summary(self):
        issue_summary = self.context_history.get("issue_summary", "")
        approach = self.context_history.get("approach", "")
        code_snippets = self.context_history.get("code_snippets", [])
        
        code_summary = "\n".join(f"Function {i+1}:\n{code}" for i, code in enumerate(code_snippets))
        self.prompt = f""" Create a pull request summary for the following:
        Issue Summary:
        {issue_summary}
        Approach:
        {approach}
        Code:
        {code_summary}
        Pull Request Summary:
        """
        ans = self.llm.invoke(self.prompt, stop=['<|eot_id|>'])
        # print("Summarized Issue :", ans)
        return ans.strip()

    def contribute_to_issue(self, issue_text):
        # Summarize the issue
        issue_summary = self.summarize_issue(issue_text)
        print(issue_text, issue_summary)
        
        # Plan the approach
        approach = self.plan_approach(issue_summary)
        print(approach)
        
        # Split the approach into individual tasks/instructions
        tasks = approach.split('. ')  # Assuming each step ends with a period and space
        print(tasks)

        code_snippets = []
        # Write code for each instruction step-by-step
        for task in tasks:
            if task.strip():  # Ensure task is not empty
                code = self.write_code(task)
                code_snippets.append(code)
                
        # Summarize in a pull request
        pr_summary = self.create_pull_request_summary()
        
        # return {
        #     "issue_summary": issue_summary,
        #     "approach": approach,
        #     "code_snippets": self.context_history.get("code_snippets", []),
        #     "pull_request_summary": pr_summary
        # }

        return pr_summary

In [33]:
agent = CodingAgent(model_name="llama3:8b")
issue_text = """
# Issue: Implement a Simple Blogging Platform

## Description
We need to implement a simple blogging platform with the following features:
1. Users can create an account and log in.
2. Authenticated users can create, edit, and delete their blog posts.
3. Blog posts should support Markdown for formatting.
4. Implement a public-facing page where all blog posts are listed.
5. Each blog post should have a unique URL for sharing.

## Requirements
1. User authentication system (sign up, login, logout).
2. Blog post CRUD operations (create, read, update, delete).
3. Markdown support for blog posts"""
contribution = agent.contribute_to_issue(issue_text)
print(contribution)
print(json.dumps(contribution, indent=2))


# Issue: Implement a Simple Blogging Platform

## Description
We need to implement a simple blogging platform with the following features:
1. Users can create an account and log in.
2. Authenticated users can create, edit, and delete their blog posts.
3. Blog posts should support Markdown for formatting.
4. Implement a public-facing page where all blog posts are listed.
5. Each blog post should have a unique URL for sharing.

## Requirements
1. User authentication system (sign up, login, logout).
2. Blog post CRUD operations (create, read, update, delete).
3. Markdown support for blog posts The issue is to implement a simple blogging platform with the following features:

* Users can create an account and log in.
* Authenticated users can create, edit, and delete their own blog posts.
* Blog posts support Markdown formatting.
* A public-facing page lists all blog posts.
* Each blog post has a unique URL for sharing.

The requirements include:

1. A user authentication system (sign up,

In [36]:
code_snippets = agent.context_history.get("code_snippets", [])
print(code_snippets)

['\nfrom flask import Flask, request, jsonify\nfrom flask_sqlalchemy import SQLAlchemy\nfrom flask_marshmallow import Marshmallow\n\napp = Flask(__name__)\napp.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:////tmp/test.db"\ndb = SQLAlchemy(app)\nma = Marshmallow(app)\n\nclass BlogPost(db.Model):\n    id = db.Column(db.Integer, primary_key=True)\n    title = db.Column(db.String(100), nullable=False)\n    content = db.Column(db.Text, nullable=False)\n    created_date = db.Column(db.DateTime, default=db.func.current_timestamp())\n    updated_date = db.Column(db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp())\n    user_id = db.Column(db.Integer, db.ForeignKey("user.id"), nullable=False)\n\nclass BlogPostSchema(ma.Schema):\n    class Meta:\n        fields = ("id", "title", "content", "created_date", "updated_date", "user_id")\n\nblog_post_schema = BlogPostSchema()\nmultiple_blog_posts_schema = BlogPostSchema(many=True)\n\n@app.route("/blog_post", methods=[

## Conversation ability 
1. Understanding context using Retreival Augmented Generation

In [15]:
!pip install slack-sdk
!pip install pypdf
# !pip install fastembed
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.5.0-py3-none-any.whl.metadata (7.3 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.1-py3-none-any.whl.metadata (4.3 kB)
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp311-cp311-win_amd64.whl.metadata (262 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.111.0-py3-none-any.whl.metadata (25 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.30.1-py3-none-any.whl.metadata (6.3 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.5.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Using cached onnxruntime-1.18.0-cp311-cp311-win_amd64.whl.metadata (4.4 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.25.0-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Download

In [2]:
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.vectorstores.utils import filter_complex_metadata
from langchain.document_loaders.json_loader import JSONLoader
from langchain.docstore.document import Document

In [22]:
class ChatPDF:
    vector_store = None
    retriever = None
    chain = None

    def __init__(self):
        self.model = ChatOllama(model="llama3:8b")
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
        self.prompt = PromptTemplate.from_template(
            """
            DOCUMENT:
            {context}

            QUESTION:
            {question}

            INSTRUCTIONS:
            Answer the users QUESTION 
            """
        )
        
    def ingest(self, docs):
        chunks = self.text_splitter.split_documents(docs)
        chunks = filter_complex_metadata(chunks)
        embeddings = OllamaEmbeddings(model="llama3:8b")
        vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings)
        self.retriever = vector_store.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={
                "k": 3,
                "score_threshold": 0.9,
            },
        )

        self.chain = ({"context": self.retriever, "question": RunnablePassthrough()}
                      | self.prompt
                      | self.model
                      | StrOutputParser())

    def ask(self, query: str):
        if not self.chain:
            return "Please, add a document first."

        return self.chain.invoke(query)

    def clear(self):
        self.vector_store = None
        self.retriever = None
        self.chain = None

In [None]:
# Conversation data

#Later ToDo : Get conversation data from Slack

# from slack_sdk import WebClient
# import os

# client = WebClient(token=os.environ["SLACK_BOT_TOKEN"])
# response = client.conversations_info(
#     channel="C031415926",
#     include_num_members=1
# )

# Current : Load Sample Conversation Data
import json
from pathlib import Path

file_path = r"C:\Users\sarrah\data_win\Personal_Projects\GSoC_24\preliminary-design\sample_conversation.json"
data = json.loads(Path(file_path).read_text())

texts = []

for message in data["messages"]:
    user_id = message["user"]
    text = message["text"]
    timestamp = message["ts"]
    team_id = message["team"]
    
    # Prepare text for embedding
    text_to_embed = f"{user_id} said: {text} at {timestamp} in team {team_id}. Full accompanying message was {message}"
    texts.append(text_to_embed)

documents = [Document(page_content=text) for text in texts]

chatter = ChatPDF()
chatter.ingest(documents)


In [32]:
print(documents[0])

page_content="U01ABCD2EFG said: Hey team, we need to finalize the project timeline. at 1623948724.000200 in team T01HJKL3MNO. Full accompanying message was {'client_msg_id': 'b0a8c3a5-9330-4e8b-8320-50f84e2cbf77', 'type': 'message', 'text': 'Hey team, we need to finalize the project timeline.', 'user': 'U01ABCD2EFG', 'ts': '1623948724.000200', 'team': 'T01HJKL3MNO', 'blocks': [{'type': 'rich_text', 'block_id': 'kS3', 'elements': [{'type': 'rich_text_section', 'elements': [{'type': 'text', 'text': 'Hey team, we need to finalize the project timeline.'}]}]}]}"


In [30]:
ans = chatter.ask("What time is the meeting scheduled to discuss the details of the project timeline?")
print(ans)



According to our schedule, the meeting to discuss the project timeline is set for 2 PM today. Please confirm if this timing works for you before we proceed with any further updates.


In [27]:

chatter.clear()