<a href="https://colab.research.google.com/github/LuckyMan26/RAG/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/tantanchen/dspy.git
!pip install groq
!pip install colbert

In [None]:
!pip install chromadb

In [None]:
!pip install -qU \
    transformers==4.30.2 \
    torch==2.0.1 \
    einops==0.6.1 \
    accelerate==0.20.3 \
    datasets==2.14.5 \
    chromadb \
    sentence-transformers==2.2.2

In [None]:
!pip install langchain

In [None]:
!pip uninstall lark --yes

[0m

In [None]:
!pip install lark-parser

In [None]:
!pip install lark

In [None]:
import dspy
import groq
import colbert

In [9]:
import chromadb
from sentence_transformers import SentenceTransformer

In [None]:
from langchain.text_splitter import TextSplitter
class CaseSplitter(TextSplitter):
    def __init__(self):
        super().__init__()

    def split_text(self, file):

        cases = file.strip().split("\n\nRow ")
        return cases




In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [None]:
from google.colab import files
files.upload()

In [None]:
from langchain.document_loaders import TextLoader
loader = TextLoader("processed_csv.txt", encoding="cp1252")
documents = loader.load()

In [None]:
with open("processed_csv.txt", encoding="cp1252") as f:
    state_of_the_union = f.read()
case_splitter = CaseSplitter()
cases = case_splitter.split_text(state_of_the_union)

In [None]:
cases[1]

'2:\ncaseId: 1946-002\ndocketId: 1946-002-01\ncaseIssuesId: 1946-002-01-01\nvoteId: 1946-002-01-01-01\ndateDecision: 11/18/1946\ndecisionType: opinion of the court (orally argued)\nusCite: 329 U.S. 14\nsctCite: 67 S. Ct. 13\nledCite: 91 L. Ed. 12\nlexisCite: 1946 U.S. LEXIS 1725\nterm: 1946\nnaturalCourt: Vinson 1 \tJune 24, 1946 - August 23, 1949\nchief: Vinson\ndocket: 12\ncaseName: CLEVELAND v. UNITED STATES\ndateArgument: 10/10/1945\ndateRearg: 10/17/1946\npetitioner: person accused, indicted, or suspected of crime\npetitionerState: \nrespondent: United States\nrespondentState: \njurisdiction: cert\nadminAction: \nadminActionState: \nthreeJudgeFdc: no mention that a 3-judge ct heard case\ncaseOrigin: Utah U.S. District Court\ncaseOriginState: 52\ncaseSource: U.S. Court of Appeals, Tenth Circuit\ncaseSourceState: \nlcDisagreement: no mention that dissent occurred\ncertReason: putative conflict\nlcDisposition: affirmed\nlcDispositionDirection: conservative\ndeclarationUncon: no decla

In [None]:
docs = text_splitter.split_documents(documents)

In [None]:
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

In [None]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
from langchain.vectorstores import Chroma

In [None]:
persist_directory = 'docs/chroma/'

In [None]:
import os

In [None]:
db = Chroma.from_texts(cases, embedding_function)

In [None]:
coll = db.get()

db.delete_collection()

In [None]:
lm = dspy.GROQ(model='mixtral-8x7b-32768', api_key ="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",max_tokens=1000 )
dspy.configure(lm=lm)

In [None]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = db
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        context = self.retrieve.max_marginal_relevance_search(question,k=3)
        answer = self.generate_answer(context=context, question=question)
        return answer

In [None]:
from dspy.teleprompt import BootstrapFewShot

In [None]:
uncompiled_rag = RAG()

In [None]:
print(uncompiled_rag("Give me an example of case where chief was Warren").answer)

In [None]:
lm.inspect_history(n=1)

In [None]:
import pandas as pd

In [None]:
from google.colab import files
files.upload()

In [None]:
import numpy as np

In [None]:
df = pd.read_csv("output.csv", encoding="cp1252")

In [None]:
df.head()

In [None]:
cases_from_df = np.array_split(df, len(df))

In [None]:
!pip install chromadb datasets

In [None]:
from datasets import load_dataset

In [10]:
client = chromadb.Client()
collection = client.create_collection("Supreme_court_decisions")


In [11]:
client.delete_collection("Supreme_court_decisions")

In [None]:
cases_from_df[1]

In [None]:
from langchain.chains.query_constructor.base import AttributeInfo

In [None]:
metadata_field_info = [
    AttributeInfo(
        name="caseId",
        description="This is the first of four unique internal identification numbers. The first four digits are the term. The next four are the case within the term (starting at 001 and counting up).",
        type="string",
    ),
    AttributeInfo(
        name="docketId",
        description=" This is the second of four unique internal identification numbers.The first four digits are the term. The next four are the case within the term (starting at 001 and counting up). The last two are the number of dockets consolidated under the U.S. Reports citation (starting at 01 and counting up).  ",
        type="string",
    ),
    AttributeInfo(
        name="caseIssuesId",
        description="This is the third of four unique internal identification numbers. The first four digits are the term. The next four are the case within the term (starting at 001 and counting up). The next two are the number of dockets consolidated under the U.S. Reports citation (starting at 01 and counting up). The last two are the number of issues and legal provisions within the case (starting at 01 and counting up). ",
        type="string",
    ),
    AttributeInfo(
        name="voteId",
        description="This is the fourth of four unique internal identification numbers. The first four digits are the term. The next four are the case within the term (starting at 001 and counting up). The next two are the number of dockets consolidated under the U.S. Reports citation (starting at 01 and counting up). The next two are the number of issues and legal provisions within the case (starting at 01 and counting up). The next two indicate a split vote within an issue or legal provision (01 for only one vote; 02 if a split vote). The final two represent the vote in the case (usually runs 01 to 09, but fewer if less than all justices participated). ",
        type="string",
    ),
    AttributeInfo(
        name="usCite",
        description=" Provides the citation to each case from the official United States Reports (US) and the three major unofficial Reports",
        type="string",
    ),
    AttributeInfo(
        name="sctCite",
        description="Provides the citation to each case from theSupreme Court Reporter",
        type="string",
    ),
    AttributeInfo(
        name="ledCite",
        description="Provides the citation to each case from the Lawyers' Edition of the United States Reports(LEd)",
        type="string",
    ),
     AttributeInfo(
        name="lexisCite",
        description="Provides the citation to each case from the LEXIS cite",
        type="string",
    ),
     AttributeInfo(
        name="docket",
        description="This variable contains the docket number that the Supreme Court has assigned to the case. Prior to the first two terms of the Burger Court (1969-1970), different cases coming to the Court in different terms could have the same docket number. The Court eliminated the possibility of such duplication by including the last two digits of the appropriate term before the assigned docket number. Since the 1971 Term, the Court has also operated with a single docket. Cases filed pursuant to the Court's appellate jurisdiction have a two-digit number corresponding to the term in which they were filed, followed by a hyphen and a number varying from one to five digits. Cases invoking the Court's original jurisdiction have a number followed by the abbreviation, `Orig` ",
        type="string",
    ),
     AttributeInfo(
        name="dateRearg",
        description="On those infrequent occasions when the Court orders that a case be reargued, this variable specifies the date of such argumen",
        type="date",
    ),
      AttributeInfo(
        name="jurisdiction",
        description="The Court uses a variety of means whereby it undertakes to consider cases that it has been petitioned to review. These are listed below. The most important ones are the writ of certiorari, the writ of appeal, and for legacy cases the writ of error, appeal, and certification. ",
        type="string",
    ),
      AttributeInfo(
        name="adminAction",
        description="This variable pertains to administrative agency activity occurring prior to the onset of litigation. Note that the activity may involve an administrative official as well as that of an agency. The general rule for an entry in this variable is whether administrative action occurred in the context of the case. Note too that this variable identifies the specific federal agency. If the action occurred in a state agency, adminAction is coded as 117 (State Agency). See the variable adminActionState for the identity of the state. ",
        type="string",
    ),
      AttributeInfo(
        name="adminActionState",
        description="Administrative action may be either state or federal. If administrative action was taken by a state or a subdivision thereof, this variable identifies the state",
        type="string",
    ),
      AttributeInfo(
        name="threeJudgeFdc",
        description="This variable will be checked if the case was heard by a three-judge federal district court (occasionally called “as specially constituted district court”). Beginning in the early 1900s, Congress required three-judge district courts to hear certain kinds of cases. More modern-day legislation has reduced the kinds of lawsuits that must be heard by such a court.",
        type="string",
    ),
      AttributeInfo(
        name="caseOriginState",
        description="If the case originated in a state court, this variable identifies the state",
        type="string",
    ),
      AttributeInfo(
        name="caseSourceState",
        description="If the source of the case (i.e., the court whose decision the Supreme Court reviewed) is a state court, this variable identifies the state",
        type="string",
    ),
     AttributeInfo(
        name="lcDisagreement",
        description="An entry in this variable indicates that the Supreme Court's majority opinion mentioned that one or more of the members of the court whose decision the Supreme Court reviewed dissented. The presence of such disagreement is limited to a statement to this effect somewhere in the majority opinion. I.e, `divided,` `dissented,` `disagreed,` `split.` A reference, without more, to the `majority` or `plurality` does not necessarily evidence dissent. The other judges may have concurred. ",
        type="string",
    ),
       AttributeInfo(
        name="certReason",
        description="This variable provides the reason, if any, that the Court gives for granting the petition for certiorari. If the case did not arise on certiorari, this variable will be so coded even if the Court provides a reason why it agreed to hear the case. The Court, however, rarely provides a reason for taking jurisdiction by writs other than certiorari. ",
        type="string",
    ),
        AttributeInfo(
        name="lcDisposition",
        description="This variable specifies the treatment the court whose decision the Supreme Court reviewed accorded the decision of the court it reviewed; e.g., whether the court below the Supreme Court---typically a federal court of appeals or a state supreme court---affirmed, reversed, remanded, etc. the decision of the court it reviewed---typically a trial court. lcDisposition will not contain an entry if the decision the Supreme Court reviewed is that of a trial court or if the case arose under the Supreme Court's original jurisdiction (see the jurisdiction variable). The former occurs frequently in the legacy data. ",
        type="string",
    ),

      AttributeInfo(
        name="lcDispositionDirection",
        description="lcDispositionDirection permits determination of whether the Supreme Court's disposition of the case upheld or overturned a liberal or a conservative lower court decision. ",
        type="string",
    ),
      AttributeInfo(
        name="declarationUncon",
        description="An entry in this variable indicates that the Court either declared unconstitutional an act of Congress; a state or territorial statute, regulation, or constitutional provision; or a municipal or other local ordinance. In coding this variable we consulted several sources. Most helpful was the Congressional Research Service's Constitution of the United States of America: Analysis and Interpretation (CONAN) (https://www.congress.gov/constitution-annotated) and the appendix to volume 131 of the U.S. Reports. ",
        type="string",
    ),
      AttributeInfo(
        name="caseDisposition",
        description="""The treatment the Supreme Court accorded the court whose decision it reviewed is contained in this variable; e.g., affirmed, vacated, reversed and remanded, etc. The values here are the same as those for lcDisposition (how the court whose decision the Supreme Court reviewed disposed of the case). For original jurisdiction cases, this variable will be empty unless the Court's disposition falls under 1 or 9 below (stay, petition, or motion granted; petition denied or appeal dismissed). For cases in which the Court granted a motion to dismiss, caseDisposition is coded as 9 (petition denied or appeal dismissed). There is "no disposition" if the Court denied a motion to dismiss. """,
        type="string",
    ),
       AttributeInfo(
        name="caseDispositionUnusual",
        description="An entry (1) will appear in this variable to signify that the Court made an unusual disposition of the cited case which does not match the coding scheme of the preceding variable. The disposition that appears closest to the unusual one made by the Court should be selected for inclusion in the preceding variable, caseDisposition. ",
        type="string",
    ),
      AttributeInfo(
        name="partyWinning",
        description="""This variable indicates whether the petitioning party (i.e., the plaintiff or the appellant) emerged victorious. The victory the Supreme Court provided the petitioning party may not have been total and complete (e.g., by vacating and remanding the matter rather than an unequivocal reversal), but the disposition is nonetheless a favorable one.
With some adjustments, we coded this variable according to the following rules:
The petitioning party lost if the Supreme Court affirmed (caseDisposition=2) or dismissed the case/denied the petition (caseDisposition=9).
The petitioning party won in part or in full if the Supreme Court reversed (caseDisposition=3), reversed and remanded (caseDisposition= 4), vacated and remanded (caseDisposition=5), affirmed and reversed in part (caseDisposition=6), affirmed and reverse in part and remanded (caseDisposition=7), or vacated (caseDisposition=8)
The petitioning party won or lost may be unclear if the Court certified to/from a lower court. """,
        type="integer",
    ),
    AttributeInfo(
        name="precedentAlteration",
        description="""A "1" will appear in this variable if the majority opinion effectively says that the decision in this case "overruled" one or more of the Court's own precedents. Occasionally, in the absence of language in the prevailing opinion, the dissent will state clearly and persuasively that precedents have been formally altered: e.g., the two landmark reapportionment cases: Baker v. Carr, 369 U.S. 186 (1962), and Gray v. Sanders, 372 U.S. 368 (1963). Once in a great while the majority opinion will state--again in so many words--that an earlier decision overruled one of the Court's own precedents, even though that earlier decision nowhere says so. E.g, Patterson v. McLean Credit Union, 485 U.S. 617 (1988), in which the majority said that Braden v. 30th Judicial Circuit of Kentucky, 410 U.S. 484, 35 L Ed 2d 443 (1973) overruled a 1948 decision. On the basis of this later language, the earlier decision will contain a "1" in this variable. Alteration also extends to language in the majority opinion that states that a precedent of the Supreme Court has been "disapproved," or is "no longer good law." """,
        type="string",
    ),

     AttributeInfo(
        name="voteUnclear",
        description="""The votes in a case are those specified in the opinions.
Do note, however, that the majority opinion in a number of Marshall Court decisions reports that unnamed justices were in disagreement about the resolution of the case. These do not identify who the dissenters were. We, therefore, look to the majority opinion itself to specify who voted how. """,
        type="string",
    ),
    AttributeInfo(
        name="decisionDirectionDissent",
        description="""Once in a great while the majority as well as the dissenting opinion in a case will both support or, conversely, oppose the issue to which the case pertains. For example, the majority and the dissent may both assert that the rights of a person accused of crime have been violated. The only difference between them is that the majority votes to reverse the accused's conviction and remand the case for a new trial, while the dissent holds that the accused's conviction should be reversed, period. In such cases, the entry in the decisionDirection variable should be determined relative to whether the majority or the dissent more substantially supported the issue to which the case pertains, and an entry should appear in this variable. In the foregoing example, the direction of decision variable (decisionDirection) should show a 0(conservative) because the majority provided the person accused of crime with less relief than does the dissent, and direction based on dissent should show a 1 (liberal) The person accused of crime actually won the case, but won less of a victory than the dissent would have provided. """,
        type="string",
    ),
     AttributeInfo(
        name="authorityDecision1",
        description="""This variable and the next one (authorityDecision2) specify the bases on which the Supreme Court rested its decision with regard to each legal provision that the Court considered in the case (see variable lawType).

Neither of them lends itself to objectivity. Many cases arguably rest on more than two bases for decision. Given
that the Court's citation of its precedents also qualifies as a common law decision and that most every case can be considered as at least partially based thereon, common law is the default basis for the Court's decisions. With the exception of decrees and brief non-orally argued decisions you may safely add common law to those cases lacking a second basis for decision. """,
        type="string",
    ),
     AttributeInfo(
        name="authorityDecision2",
        description="See variable Authority for Decision 1 (authorityDecision1). ",
        type="string",
    ),
      AttributeInfo(
        name="lawType",
        description="This variable and its components identify the constitutional provision(s), statute(s), or court rule(s) that the Court considered in the case",
        type="string",
    ),
    AttributeInfo(
        name="lawSupp",
        description=" The difference between them is that lawSupp and lawMinor are coded finely; they identify the specific law, constitutional provision or rule at issue (e.g., Article I, Section 1; the Federal Election Campaign Act; the Federal Rules of Evidence). lawType is coded more broadly (e.g., constitution, federal statute, court rules).",
        type="string",
    ),
    AttributeInfo(
        name="lawMinor",
        description="This variable, lawMinor, is reserved for infrequently litigated statutes. Statutes substantially absent from the decision making of the modern Courts will be found in this variable. For these, lawMinor identifies the law at issue. Note: This is a string variable. ",
        type="string",
    ),
    AttributeInfo(
        name="majOpinWriter",
        description="This variable identifies the author of the Court's opinion or judgment, as the case may be.",
        type="string",
    ),
    AttributeInfo(
        name="majOpinAssigner",
        description="This variable identifies the assigner of the opinion or judgment of the Court, as the case may be. These data are drawn from the membership in the final (report vote) coalition and from the rules governing opinion assignment: If the chief justice is a member of the majority vote coalition at the conference vote, he assigns the opinion; if not, the senior associate justice who is a member of the majority at the conference vote does so. According to several scholarly studies, considerable voting shifts occur between the final conference vote (where the assignment is made) and the vote that appears in the Reports. As a result, in approximately 16 percent of the cases, a person other than the one identified by the database actually assigned the opinion. ",
        type="string",
    ),
    AttributeInfo(
        name="splitVote",
        description="This variable indicates whether the vote variables (e.g., majVotes, minVotes) pertain to the vote on the first or second issue (or legal provision). Because split votes are so rare over 99 percent of the votes are on the first issue. ",
        type="string",
    ),
     AttributeInfo(
        name="majVotes",
        description="This variable specifies the number of justices voting in the majority; minVotes indicates the number of justices voting in dissent. ",
        type="integer",
    ),
     AttributeInfo(
        name="minVotes",
        description="This variable specifies the number of votes in dissent. Only dissents on the merits are specified in this variable.Justices who dissent from a denial or dismissal of certiorari or who disagree with the Court's assertion of jurisdiction count as not participating in the decision. ",
        type="integer",
    ),
]

In [None]:
important_columns = ["decisionType", "dateDecision","term","naturalCourt", "caseName","chief", "dateArgument","petitioner", "petitionerState","respondent","respondentState","caseOrigin","caseSource", "issue","issueArea","decisionDirection"]
metadata_columns = [item for item in df.columns.tolist() if item not in important_columns]

In [None]:
for i in range(len(cases_from_df)):
  res=""
  metadata=""
  for column_name in important_columns:
    res+=column_name+": " + str(cases_from_df[i][column_name].item()) +"\n"
  for column_name in metadata_columns:
    metadata+=column_name+": " + str(cases_from_df[i][column_name].item()) + "\n"
  collection.add(
      ids=[str(i)],
      documents=res,
       metadatas = [{"documents": metadata}])


In [None]:
langchain_chroma = Chroma(
    client=client,
    collection_name="Supreme_court_decisions",
    embedding_function=embedding_function
)

In [None]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

In [None]:
document_content_description = "Case details"

In [None]:
!pip install langchain-openai

In [None]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
    AttributeInfo
)

In [None]:
question = "Can you give me a details about case where chief was Warren."

In [None]:
class RAG2(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = langchain_chroma
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        context = self.retrieve.max_marginal_relevance_search(question,k=1)
        print(context)
        answer = self.generate_answer(context=context, question=question)
        return dspy.Prediction(answer=answer.answer)

In [None]:
rag2 = RAG2()

In [None]:
print(rag2("Can you give me a details about case where chief was Warren.").answer)

The case name is UNITED STATES v. BRAMBLETT, and it was decided on 4/4/1955. The case was argued on 2/7/1955, and it originated from the District Of Columbia U.S. District Court. The decision direction of the case was conservative, and Earl Warren was the chief justice at the time.


In [None]:
print(rag2("In which court this case originated").answer)

This case originated in the State Trial Court.


In [None]:
print(rag2("When this case happened").answer)

The case KAWAKITA v. UNITED STATES was decided on 6/2/1952.


In [None]:
lm.inspect_history(n=1)





Given the fields `context`, `question`, produce the fields `answer`.

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context: «page_content='decisionType: opinion of the court (orally argued)\ndateDecision: 4/4/1955\nterm: 1954\nnaturalCourt: Warren 3 \tMarch 28, 1955 - October 15, 1956\ncaseName: UNITED STATES v. BRAMBLETT\nchief: Warren\ndateArgument: 2/7/1955\npetitioner: United States\npetitionerState: nan\nrespondent: person accused, indicted, or suspected of crime\nrespondentState: nan\ncaseOrigin: District Of Columbia U.S. District Court\ncaseSource: District Of Columbia U.S. District Court\nissue: statutory construction of criminal laws: false statements (cf. statutory construction of criminal laws: perjury)\nissueArea: Criminal Procedure\ndecisionDirection: conservative\n' metadata={'adminAction': 'nan', 'adminActionState': 'nan', 'authority

'\n\n\nGiven the fields `context`, `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nContext: ${context}\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext: «page_content=\'decisionType: opinion of the court (orally argued)\\ndateDecision: 4/4/1955\\nterm: 1954\\nnaturalCourt: Warren 3 \\tMarch 28, 1955 - October 15, 1956\\ncaseName: UNITED STATES v. BRAMBLETT\\nchief: Warren\\ndateArgument: 2/7/1955\\npetitioner: United States\\npetitionerState: nan\\nrespondent: person accused, indicted, or suspected of crime\\nrespondentState: nan\\ncaseOrigin: District Of Columbia U.S. District Court\\ncaseSource: District Of Columbia U.S. District Court\\nissue: statutory construction of criminal laws: false statements (cf. statutory construction of criminal laws: perjury)\\nissueArea: Criminal Procedure\\ndecisionDirection: conservative\\n\' metadata={\'adminAction\': \'

In [None]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [None]:
class RagWithMemory():
  def __init__(self):
    self.rag = RAG2()
    self.chat_history = []
  def forward(self, question):
    new_prompt_tempalte = f"Consider previous chat history:{self.chat_history} \nConsider this information in your following answers\n Question: {question}"
    pred = self.rag(new_prompt_tempalte)
    answer = pred.answer

    self.chat_history.append(f"Question: {question} \nAnswer: {answer}")
    return answer

In [None]:
rag_with_memory = RagWithMemory()

In [None]:
print(rag_with_memory.forward("Give me example of a case where chief was Warren."))

The case "BROWNELL, ATTORNEY GENERAL, SUCCESSOR TO THE ALIEN PROPERTY CUSTODIAN, v. SINGER" is an example of a case where the chief was Warren.


In [None]:
print(rag_with_memory.forward("Give me details about this case"))

The case "BROWNELL, ATTORNEY GENERAL, SUCCESSOR TO THE ALIEN PROPERTY CUSTODIAN, v. SINGER" was decided on April 5, 1954, during the 1953 term of court. The chief justice was Warren. The case originated from the State Supreme Court, and the issue was the priority of federal fiscal claims over those of states or private entities. The decision direction was liberal, and the case was reversed, with the petitioning party receiving a favorable disposition. The vote was split, with 5 votes in favor and 3 votes against. The case did not result in a declaration of unconstitutionality and involved federal common law.


In [None]:
print(rag_with_memory.forward("When this has happened"))

The case was decided on April 5, 1954.


In [None]:
print(rag_with_memory.forward("What was the issue area of this case"))

The issue area of the case "BROWNELL, ATTORNEY GENERAL, SUCCESSOR TO THE ALIEN PROPERTY CUSTODIAN, v. SINGER" was Federal Taxation.


In [None]:
!pip install -q streamlit

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [12]:
%%writefile app.py
import dspy
import groq
import colbert
import chromadb
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import TextSplitter
import streamlit as st
import pandas as pd
from langchain.vectorstores import Chroma
import numpy as np
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
import streamlit as st
class CaseSplitter(TextSplitter):
    def __init__(self):
        super().__init__()

    def split_text(self, file):
        cases = file.strip().split("\n\nRow ")
        return cases


lm = dspy.GROQ(model='mixtral-8x7b-32768', api_key="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",
               max_tokens=1000)
dspy.configure(lm=lm)
df = pd.read_csv("output.csv", encoding="cp1252")


def create_collection(client):
    cases_from_df = np.array_split(df, len(df))
    collection = client.get_or_create_collection("Supreme_court_decisions")
    important_columns = ["decisionType", "dateDecision", "term", "naturalCourt", "caseName", "chief", "dateArgument",
                         "petitioner", "petitionerState", "respondent", "respondentState", "caseOrigin", "caseSource",
                         "issue", "issueArea", "decisionDirection"]
    metadata_columns = [item for item in df.columns.tolist() if item not in important_columns]
    for i in range(len(cases_from_df)):

        res = ""
        metadata = ""
        for column_name in important_columns:
            res += column_name + ": " + str(cases_from_df[i][column_name].item()) + "\n"
        for column_name in metadata_columns:
            metadata += column_name + ": " + str(cases_from_df[i][column_name].item()) + "\n"
        collection.add(
            ids=[str(i)],
            documents=res,
            metadatas=[{"documents": metadata}])


if 'button_clicked' not in st.session_state:
    st.session_state['button_clicked'] = False


class RAG2(dspy.Module):
    def __init__(self):
        super().__init__()
        client = chromadb.Client()
        print(client.list_collections())
        if "Supreme_court_decisions" not in [c.name for c in client.list_collections()]:
            create_collection(client)
        embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        langchain_chroma = Chroma(
            client=client,
            collection_name="Supreme_court_decisions",
            embedding_function=embedding_function
        )
        self.retrieve = langchain_chroma
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    def forward(self, question):
        context = self.retrieve.max_marginal_relevance_search(question, k=1)
        answer = self.generate_answer(context=context, question=question)
        print(context)
        return dspy.Prediction(answer=answer.answer)


class RagWithMemory():
    def __init__(self):
        print("RagWithMemory")
        self.rag = RAG2()


    def forward(self, question, history):
        new_prompt_tempalte = f"You are an AI assistant, which gives details about already existing Supreme Court decisions. Consider previous chat history:{history} \nConsider this information in your following answers\n Question: {question}"

        pred = self.rag(new_prompt_tempalte)
        answer = pred.answer

        return answer


def get_llm_response(question, rag):
    answer = rag.forward(question)
    return answer


st.title("💬 Chatbot")
st.caption("🚀 A streamlit chatbot powered by OpenAI LLM")
if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

if prompt := st.chat_input():
    rag_with_memory = RagWithMemory()

    st.chat_message("user").write(prompt)
    msg = rag_with_memory.forward(prompt, st.session_state.messages)
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.session_state.messages.append({"role": "assistant", "content": msg})
    st.chat_message("assistant").write(msg)


Overwriting app.py


In [None]:
!pip install -U langchain-community

In [13]:
!wget -q -O - ipv4.icanhazip.com

34.148.101.162


In [15]:
! streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.148.101.162:8501[0m
[0m
[K[?25hnpx: installed 22 in 2.904s
your url is: https://orange-cobras-remain.loca.lt

`from langchain_community.vectorstores import Chroma`.

To install langchain-community run `pip install -U langchain-community`.

`from langchain_community.vectorstores import Chroma`.

To install langchain-community run `pip install -U langchain-community`.
RagWithMemory
[]
2024-05-07 09:58:50.698228: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-07 09:58:50.698306: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting t