In [None]:
!pip install llama-index
!pip install llama-index-llms-together
!pip install python-dotenv
!pip install together

In [None]:
!pip freeze > requirements.txt

In [1]:
%load_ext dotenv
%dotenv

In [2]:
import os
from typing import List, Optional
from llama_index.llms.together import TogetherLLM
from llama_index.core.agent.workflow import FunctionAgent
API_TOKEN: Optional[str] = os.getenv("LLM_API_TOKEN")

### Basic together.ai example

In [None]:
llm = TogetherLLM(
    # DONT CHANGE THE MODEL
    model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
    api_key=API_TOKEN
)

In [None]:
resp = llm.complete("WHAT THE")
print(resp)

# Overall Workflow (i think?)
1. Manifesto data > PDF Data Connector (or any applicable connector)
2. Structured Data Extraction on manifesto to issues/questions
3. Convert extracted data into SQL and then insert into database

Then, augment model with RAG from database -> Make model have access to our issue/stances database so that it can see the current issues/stances and fill in gaps or ignore if we already have the data

## Pydantic Classes for SDE

In [45]:
from pydantic import BaseModel, Field
from typing import Deque, List, Optional, Tuple

# Need to somehow cross check category
class Issue(BaseModel):
    """A single issue, problem, or event that can be addressed by political parties, typically for them to form policy around."""
    
    description: str = Field(description="A long (max 300 characters) description of the issue at hand, it should not be tied to any political party and simply be a description of the issue discussed")
    summary: str = Field(description="A short (max 50 characters), concise, summary of the issue. again, it should not be tied to any political party and simply be a description of the issue discussed")

class Stance(BaseModel):
    """A political party's stance on an issue"""
    
    issue: Issue = Field(description = "The issue that is being discussed")
    stand: bool = Field(description = "Whether the party disagrees or agrees with the contents of the issue")
    reason: str = Field(description = "A description on why the bparty has this stance")
    party: str = Field(description = "The name of the political party that holds this stance")    

class IssueList(BaseModel):
    "A list of issues addressed in the context provided"

    issues: List[Issue]

## Reading WP manifesto data (from .html)

In [4]:
from llama_index.core import SimpleDirectoryReader
reader = SimpleDirectoryReader(
    input_files=["./data/WP-Manifesto.txt"]
)
data = reader.load_data()
text = data[0].text

### Pre-process data
TODO: We have to remove this step - this is simply truncating output. Find another way to make data fit in context window (condense?) or find a way to increase context window (pay money? lol?)

In [58]:
# Pre-process data
# the free APIs only allow 8193 tokens maximum
# We set the output to 300 tokens, leaving us with 8193-300 tokens left
# Then we multiply by 3/4 to roughly get the amount of words that should be in the input
approx_words = int((8193-1000) * 0.75)
# Cut down the text to the approximate words
text = ' '.join(data[0].text.split()[:approx_words])

## Instantiating the Structured LLM (custom wrapper over together.ai LLM since the together.api endpoint via llamaindex doesn't work with structured json for some reason)

In [55]:
from typing import Optional, List, Mapping, Any

import together
from llama_index.core import SimpleDirectoryReader, SummaryIndex
from llama_index.core.callbacks import CallbackManager
from llama_index.core.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core import Settings


class CustomTogetherLLM(CustomLLM):
    context_window: int = 8193 # check this
    num_output: int = 800 #check this
    model_name: str = "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
    client: together.client.Together = together.Together(api_key=API_TOKEN)

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
            is_chat_model=False
        )

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        # TODO: Error handling
        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=[{"role":"user","content": prompt}],
            max_tokens = self.num_output
        )
        print(response.choices[0].message.content)
        return CompletionResponse(text=response.choices[0].message.content)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        raise NotImplementedError("no streams.")

    
Settings.llm = CustomTogetherLLM()
Settings.context_window = 8193
Settings.num_output = 256

In [59]:
llm = CustomTogetherLLM()
sllm = llm.as_structured_llm(IssueList)

## Extracting the data

In [60]:
response = sllm.complete(text)

```json
{
  "issues": [
    {
      "description": "Unemployment insurance to support retrenched workers",
      "summary": "Unemployment Insurance"
    },
    {
      "description": "Legislation to prohibit employment discrimination based on race, gender, age, and nationality",
      "summary": "Anti-Discrimination Law"
    },
    {
      "description": "Mandatory consideration of flexible work arrangements for companies with over 20 employees",
      "summary": "Flexible Work Arrangements"
    },
    {
      "description": "Introduction of a shared parental leave scheme for 24 weeks of government-paid leave",
      "summary": "Shared Parental Leave"
    },
    {
      "description": "Credential assessments for Employment Pass and S Pass job applicants",
      "summary": "Work Pass Credential Assessments"
    },
    {
      "description": "Expansion of the public rental scheme to cater to changing aspirations of Singaporeans",
      "summary": "Public Rental Scheme"
    },
    {
     

ValidationError: 1 validation error for IssueList
  Invalid JSON: EOF while parsing a list at line 98 column 5 [type=json_invalid, input_value='{\n  "issues": [\n    {\... Cost Reduction"\n    }', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [None]:
print(response)