In [None]:
!pip install PyMySQL SQLAlchemy tidb-vector pydantic pydantic_core dspy-ai langchain-community wikipedia pyvis openai

Collecting PyMySQL
  Downloading PyMySQL-1.1.1-py3-none-any.whl.metadata (4.4 kB)
Collecting tidb-vector
  Downloading tidb_vector-0.0.11-py3-none-any.whl.metadata (9.1 kB)
Collecting dspy-ai
  Downloading dspy_ai-2.4.13-py3-none-any.whl.metadata (39 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyvis
  Downloading pyvis-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting openai
  Downloading openai-1.41.1-py3-none-any.whl.metadata (22 kB)
Collecting backoff (from dspy-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting datasets (from dspy-ai)
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting optuna (from dspy-ai)
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting structlog (from dspy-ai)
  Downloading structlog-24.4.0-py3-n

In [None]:
from google.colab import userdata
import dspy
import openai

# Configure DSPy with OpenAI's GPT-4 model
open_ai_client = dspy.OpenAI(model="gpt-4o", api_key='', max_tokens=4096)
dspy.settings.configure(lm=open_ai_client)

In [None]:
import pandas as pd

entities_df = pd.read_csv('entities.csv')

if 'embedding' in entities_df.columns:
    entities_df = entities_df.drop(columns=['embedding'])


relationships_df = pd.read_csv('relationships.csv')

relationships_df['relationship_description'] = relationships_df['relationship_description'].fillna('')



In [None]:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, BigInteger, String
from sqlalchemy.dialects.mysql import LONGTEXT
from tidb_vector.sqlalchemy import VectorType

Base = declarative_base()

class DatabaseEntity(Base):
    __tablename__ = 'entities'

    case_id = Column(BigInteger, primary_key=True)
    decision_date = Column(String(50))
    citation = Column(String(512))
    case_name = Column(LONGTEXT)
    category = Column(String(512))
    opinion = Column(LONGTEXT)
    embedding = Column(VectorType(1536))

class DatabaseRelationship(Base):
    __tablename__ = 'relationships'

    source_case_id = Column(BigInteger, primary_key=True)
    target_case_id = Column(BigInteger, primary_key=True)
    relationship_description = Column(String(512))

  Base = declarative_base()


In [None]:
def get_query_embedding(query: str):
    open_ai_client = openai.OpenAI(api_key='')
    response = open_ai_client.embeddings.create(input=[query], model="text-embedding-3-small")
    return response.data[0].embedding

In [None]:
embeddings = []
successful_cases = []

i = 0

for index, row in entities_df.iterrows():
    print(i)
    i += 1
    combined_text = f"{row['case_name']} {row['opinion']}"
    try:
      embedding = get_query_embedding(combined_text)
      successful_cases.append(row['case_id'])
    except Exception as e:
      print(e)
      embedding = None


    if embedding is not None and len(embedding) > 0:
        embeddings.append(embedding)
    else:
        embeddings.append(None)

0
1
2
3
4
Error code: 400 - {'error': {'message': "This model's maximum context length is 8192 tokens, however you requested 9898 tokens (9898 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
Error code: 400 - {'error': {'message': "This model's maximum context length is 8192 tokens, however you requested 9227 tokens (9227 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
21
Error code: 400 - {'error': {'message': "This model's maximum context length is 8192 tokens, however you requested 8804 tokens (8804 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

In [None]:
entities_df['embedding'] = embeddings
entities_df = entities_df[entities_df['embedding'].notnull()]

ValueError: Length of values (237) does not match length of index (221)

In [None]:
print(entities_df.head())

   case_id decision_date          citation  \
0  1155802    2023-09-20       335 S.E 271   
1  8552047    1979-06-19   42 N.C. App. 43   
2  8548935    1980-03-04  45 N.C. App. 367   
3  8549229    1980-03-04  45 N.C. App. 444   
5  8626817    1937-03-17      211 N.C. 354   

                                           case_name  \
0               JANE DOE v. GREENWOOD MEDICAL CENTER   
1   EMMA L. JONES v. NATIONWIDE MUTUAL INSURANCE CO.   
2  DONALD A. KAHAN and JACK S. JACOBS, Plaintiffs...   
3  NANCY CAROL LOVE, Plaintiff v. NATIONWIDE MUTU...   
5       ROSS L. VAUGHAN v. MRS. ELIZABETH S. VAUGHAN   

                   category  \
0  Medical Malpractice Case   
1          Contract Dispute   
2                Civil Case   
3        Insurance Law Case   
5           Family Law Case   

                                             opinion  \
0  SMITH, Justice.\nOn September 10, 2021, plaint...   
1  ARNOLD, Judge.\nDefendant contends that this a...   
2  VAUGHN, Judge.\nThe trial co

In [None]:
print(len(successful_cases))

221


In [None]:
relationships_df = relationships_df[(relationships_df['source_case_id'].isin(successful_cases)) & (relationships_df['target_case_id'].isin(successful_cases))]

In [None]:
print(len(relationships_df))

relationships_df.to_csv('relationships_latest.csv', index=False)
entities_df.to_csv('entities_latest.csv', index=False)

483


In [None]:
from sqlalchemy import create_engine, or_
from sqlalchemy.orm import sessionmaker, Session
import numpy as np
from numpy.linalg import norm
from sqlalchemy.ext.declarative import declarative_base
from tidb_vector.sqlalchemy import VectorType
from sqlalchemy import Column, BigInteger, String, URL
from sqlalchemy.dialects.mysql import LONGTEXT


def get_db_url():
    return URL(
        drivername="mysql+pymysql",
        username="",
        password="",
        host='gateway01.us-east-1.prod.aws.tidbcloud.com',
        port=4000,
        database="",
        query={"ssl_verify_cert": True, "ssl_verify_identity": True},
    )

# Establish connection to TiDB serverless
engine = create_engine(get_db_url(), pool_recycle=300)
Session = sessionmaker(bind=engine)
session = Session()

Base.metadata.create_all(engine)

In [None]:
for index, row in entities_df.iterrows():
    entity = DatabaseEntity(
        case_id=row['case_id'],
        decision_date=row['decision_date'],
        citation=row['citation'],
        case_name=row['case_name'],
        category=row['category'],
        opinion=row['opinion'],
        embedding=row['embedding']
    )
    session.add(entity)


session.commit()

for index, row in relationships_df.iterrows():
    relationship = DatabaseRelationship(
        source_case_id=row['source_case_id'],
        target_case_id=row['target_case_id'],
        relationship_description=row['relationship_description']
    )
    session.add(relationship)

session.commit()

In [None]:
entities = session.query(DatabaseEntity).all()
for entity in entities:
    print(entity.case_name)

relationships = session.query(DatabaseRelationship).all()
for relationship in relationships:
    print(f"{relationship.source_case_id} -> {relationship.relationship_description} -> {relationship.target_case_id}")

DALLAS L. ISENHOUR, and wife, SANDRA K. ISENHOUR v. UNIVERSAL UNDERWRITERS INSURANCE COMPANY, and UNIVERSAL UNDERWRITERS GROUP
GLORIA ANN EVANS v. JUDITH R. COWAN, Individually and in her official capacity as Director of Student Health Services, UNC-CH; BRUCE VUKOSON, Individually and in his official capacity as Director of the AfterHours Program at Student Health Services, UNC-CH; and JANE M. HOGAN, Individually and in her official capacity as Associate Director of Student Health Services, UNC-CH
STATE OF NORTH CAROLINA v. DANNY DEAN FROGGE
STATE OF NORTH CAROLINA v. ROGER SCOTT COLLINS
ROBERT A. STEINGRESS v. THERESA D. STEINGRESS
WILLIAM W. CARRIKER, JR., ELIZABETH C. CARRIKER, THOMAS E. CARRIKER, JR., and ROBERT T. CARRIKER v. CASPER O. CARRIKER, JR., NANCY CARRIKER BLACKWELDER, SAMUEL L. CARRIKER, BETTY JO CARRIKER EARLY, JANE CARRIKER FURR, JAMES EDWARD CARRIKER, JERRY L. CARRIKER, KENNETH CARRIKER, and RENA CARRIKER O'DANIEL
STATE OF NORTH CAROLINA v. SHAWN DELAMAR TRUESDALE
STA

In [None]:
from sqlalchemy.orm import sessionmaker
from sqlalchemy import or_
from typing import List

def retrieve_entities_relationships(question_embedding) -> (List[DatabaseEntity], List[DatabaseRelationship]):
    session = Session()

    try:
        entity = session.query(DatabaseEntity) \
            .order_by(DatabaseEntity.embedding.cosine_distance(question_embedding)) \
            .limit(1).first()

        if not entity:
            return [], []

        entities = {entity.case_id: entity}

        relationships = session.query(DatabaseRelationship).filter(
            or_(
                DatabaseRelationship.source_case_id == entity.case_id,
                DatabaseRelationship.target_case_id == entity.case_id
            )
        ).all()

        for r in relationships:
            source_entity = session.query(DatabaseEntity).filter_by(case_id=r.source_case_id).first()
            target_entity = session.query(DatabaseEntity).filter_by(case_id=r.target_case_id).first()

            if source_entity:
                entities[source_entity.case_id] = source_entity
            if target_entity:
                entities[target_entity.case_id] = target_entity

        return list(entities.values()), relationships

    finally:
        session.close()


In [None]:

question_embedding = get_query_embedding("What are my options if I get injured at work in a hazardous job?")
entities, relationships = retrieve_entities_relationships(question_embedding)

print("Entities:")
for entity in entities:
    print(entity.case_name)

print("\nRelationships:")
for relationship in relationships:
    print(f"{relationship.source_case_id} -> {relationship.relationship_description} -> {relationship.target_case_id}")


Entities:
Francisco FAGUNDES and Desiree Fagundes, Plaintiffs, v. AMMONS DEVELOPMENT GROUP, INC. ; East Coast Drilling & Blasting, Inc.; Scott Carle; and Juan Albino, Defendants.
CORA VEAZEY v. CITY OF DURHAM

Relationships:
12647232 -> The first legal opinion addresses the issue of jurisdiction and the effect of appeals on trial proceedings, emphasizing the importance of administering justice without delay. -> 8629835


In [None]:
def generate_result(query: str, entities, relationships):
    open_ai_client = openai.OpenAI(api_key='')
    entities_prompt = '\n'.join(map(lambda e: f'(Case ID: "{e.case_id}, "Name: "{e.case_name}", Case Opinion: "{e.opinion}")', entities))
    relationships_prompt = '\n'.join(map(lambda r: f'"{r.relationship_description}"', relationships))

    response = open_ai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "Please carefully think the user's " +
             "question and ONLY use the content below to generate answer:\n" +
             f"Entities: {entities_prompt}, Relationships: {relationships_prompt}"},
            {"role": "user", "content": query}
        ])

    return response.choices[0].message.content

In [None]:
question = "What are my options if I get injured at work in a hazardous job? Please give me a reference case"
result = generate_result(question, entities, relationships)
result

'If you get injured at work while performing a hazardous job, your primary option for seeking compensation is usually through your state\'s workers\' compensation system. This system is designed to provide you with assured, albeit limited, remedies for your injuries without the need to prove fault.\n\nA reference case that examined related issues is **"Francisco FAGUNDES and Desiree Fagundes, Plaintiffs, v. AMMONS DEVELOPMENT GROUP, INC.; East Coast Drilling & Blasting, Inc.; Scott Carle; and Juan Albino, Defendants."** In this case, Francisco Fagundes was injured during a blasting operation, which is considered an "ultrahazardous" activity. He attempted to sue his employer in court, arguing that due to the inherent risks of his job, he should be able to pursue a strict liability claim outside of the workers\' compensation system. \n\nHowever, the court ruled against Fagundes, emphasizing that the workers\' compensation system provides the exclusive remedy for workplace injuries, inclu