In [1]:
# Install required packages
# !pip install langgraph langchain chromadb sentence-transformers datasets pandas

In [2]:
# ------------------ Step 1: Load CUAD & Filter Risky Clauses ------------------
import pandas as pd
from datasets import load_dataset


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load from HuggingFace directly
url = "https://huggingface.co/datasets/theatticusproject/cuad/resolve/main/CUAD_v1/master_clauses.csv"
cuad_df = pd.read_csv(url)

In [4]:
# Risky clause columns
risky_cols = [
    "Indemnification", "Termination For Convenience", "Change Of Control",
    "Anti-Assignment", "Revenue/Profit Sharing", "Price Restrictions",
    "Minimum Commitment", "Volume Restriction", "Uncapped Liability",
    "Cap On Liability", "Liquidated Damages", "Irrevocable Or Perpetual License",
    "Source Code Escrow", "Post-Termination Services", "Audit Rights", "Non-Compete",
    "Exclusivity", "Most Favored Nation", "No-Solicit Of Customers",
    "No-Solicit Of Employees", "Non-Disparagement"
]

existing_cols = [c for c in risky_cols if c in cuad_df.columns]
before_count = len(cuad_df)
filtered_df = cuad_df[existing_cols].dropna(how="all", subset=existing_cols)

In [5]:

filtered_df["text"] = filtered_df[existing_cols].apply(
    lambda row: " ".join(row.dropna().astype(str)), axis=1
)
after_count = len(filtered_df)

print(f"Rows before filter: {before_count}")
print(f"Rows after filter:  {after_count}")
print(f"Rows removed:       {before_count - after_count}")

Rows before filter: 510
Rows after filter:  510
Rows removed:       0


In [6]:
# !pip install langchain_community langchain_huggingface


In [7]:
# ------------------ Step 3: LangGraph Agents ------------------
from langgraph.graph import StateGraph, END
from langchain.llms import HuggingFaceHub



In [None]:
# import os
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

HF_API_KEY = ""


In [9]:
# from langchain_community.llms import HuggingFaceHub

# llm = HuggingFaceHub(
#     repo_id="HuggingFaceH4/zephyr-7b-beta",
#     model_kwargs={"temperature": 0.2, "max_length": 512}
# )

In [10]:
# !pip install -U langchain-huggingface


In [11]:
from langchain_huggingface import HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    huggingfacehub_api_token=HF_API_KEY,
    temperature=0.2,        # must be here
    max_new_tokens=512      # must be here
)

In [None]:
# ------------------ Step 4: LangGraph ------------------
# def analyze_clause(state):
#     text = state["clause"]
#     risk_assessment = llm(f"Analyze the legal risk in this clause:\n\n{text}")
#     state["analysis"] = risk_assessment
#     return state

# def analyze_clause(inputs):
#     clause_text = inputs["clause"]
#     # You can use your LLM here, e.g., llm(clause_text)
#     analysis = llm(clause_text)
#     return {"analysis": analysis}

# 3️⃣ Define your node function
# def analyze_clause(state):
#     clause_text = state["clause"]
#     # call your LLM here
#     analysis = llm(clause_text)  # make sure llm is defined
#     state["analysis"] = analysis
#     return state

In [13]:
# # result = app.invoke({"clause": filtered_df.iloc[0]["text"]})
# # print(result["analysis"])
# # 1️⃣ Define the schema of your state
# state_schema = {
#     "clause": str,      # input clause text
#     "analysis": str     # output analysis
# }

# # 2️⃣ Create the graph with schema
# graph = StateGraph(state_schema=state_schema)
# # graph = StateGraph()
# #  4️⃣ Add node and set entry/exit
# graph.add_node("AnalyzeClause", analyze_clause)
# graph.set_entry_point("AnalyzeClause")
# graph.set_finish_point("AnalyzeClause", END)

# # 5️⃣ Compile graph
# app = graph.compile()

# # 6️⃣ Run test
# result = app.invoke({"clause": filtered_df.iloc[0]["text"]})
# print(result["analysis"])

In [14]:
# # ------------------ Step 1: Imports ------------------
# from langgraph.graph import StateGraph, END

# # ------------------ Step 2: Define schema as a Python class ------------------
# class ClauseState:
#     clause: str        # input clause text
#     analysis: str = "" # output analysis

# # ------------------ Step 3: Create the graph ------------------
# graph = StateGraph(state_schema=ClauseState)

# # ------------------ Step 4: Define node function ------------------
# def analyze_clause(state: ClauseState):
#     clause_text = state.clause
#     # Call your LLM here (make sure llm is already defined)
#     analysis = llm(clause_text)
#     state.analysis = analysis
#     return state

# # ------------------ Step 5: Add node and set entry/exit ------------------
# # graph.add_node("AnalyzeClause", analyze_clause)
# # graph.set_entry_point("AnalyzeClause")
# # graph.set_finish_point("AnalyzeClause", END)
# # ------------------ Step 5: Add node and set entry/exit ------------------
# graph.add_node("AnalyzeClause", analyze_clause)
# graph.set_entry_point("AnalyzeClause")
# graph.set_finish_point("AnalyzeClause")  # ✅ just the node name


# # ------------------ Step 6: Compile ------------------
# app = graph.compile()

# # ------------------ Step 7: Test ------------------
# result = app.invoke({"clause": filtered_df.iloc[0]["text"]})
# print(result.analysis)


In [15]:
filtered_df

Unnamed: 0,Termination For Convenience,Change Of Control,Anti-Assignment,Revenue/Profit Sharing,Price Restrictions,Minimum Commitment,Volume Restriction,Uncapped Liability,Cap On Liability,Liquidated Damages,...,Source Code Escrow,Post-Termination Services,Audit Rights,Non-Compete,Exclusivity,Most Favored Nation,No-Solicit Of Customers,No-Solicit Of Employees,Non-Disparagement,text
0,[],[],"['MA may not assign, sell, lease or otherwise ...",[],[],['INITIAL ORDER COMMITMENT - MA commits to pur...,[],[],['The foregoing states the entire liability of...,[],...,[],[],['MA shall keep accurate records of the sales ...,[],[],[],[],[],['Company shall not specify the business pract...,"[] [] ['MA may not assign, sell, lease or othe..."
1,"[""Notwithstanding any other provision of this ...",[],"['This Agreement may not be assigned, sold or ...",['For so long as Rogers is required by Applica...,[],"['Licensor shall make available to Rogers, on ...",[],['Except with respect to any claim or liabilit...,['Except with respect to any claim or liabilit...,[],...,[],[],"[""During the Term, and for a period of twelve ...",[],[],['In the event that Licensor grants to another...,[],[],[],"[""Notwithstanding any other provision of this ..."
2,[],[],[],['Revenue Share as specified in this CONTENT D...,[],[],[],[],[],[],...,[],[],"[""Each of the Parties may, at its own expense,...",[],['During the License Term (which is identified...,[],[],[],[],[] [] [] ['Revenue Share as specified in this ...
3,[],"[""For purposes of the preceding sentence, and ...","['Any purported assignment, delegation or tran...",['In addition to the License Fee payable in ac...,[],[],[],"[""The provisions of Section 9.1 and Section 9....",['NEITHER PARTY SHALL BE LIABLE UNDER OR IN CO...,[],...,[],[],['Licensor will pay the cost of such audits un...,[],"[""Subject to Licensee's on\xadgoing compliance...",[],[],[],[],"[] [""For purposes of the preceding sentence, a..."
4,[],[],"[""Except as otherwise specified in the previou...",['For content listed in Schedule A6 of Schedul...,[],[],[],['EXCEPT FOR THE ABOVE INDEMNIFICATION OBLIGAT...,['EXCEPT FOR THE ABOVE INDEMNIFICATION OBLIGAT...,[],...,[],[],['Until one (1) year after the expiration of t...,[],[],[],[],[],[],"[] [] [""Except as otherwise specified in the p..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,[],"[""Licensor may terminate this Agreement:<omitt...",['Neither this Agreement nor any of the rights...,['Licensee shall pay to Licensor for each Lice...,[],['Licensor may terminate this Agreement:\n\n ...,[],[],"[""If such error or discrepancy is not resolved...",[],...,[],"[""Upon the termination or expiration of the te...","[""Any such audit shall be\n\n\n\n\n\nconducted...","[""Each of Licensor and Licensee understands an...",['Licensor hereby grants to Licensee an exclus...,[],[],[],[],"[] [""Licensor may terminate this Agreement:<om..."
506,[],[],['This Agreement is not assignable by the Prof...,['If the Company elects to create and market t...,[],"[""The Professional agrees to participate in a ...",['The sum of the Quarterly Grant Numbers in ea...,[],[],[],...,[],['The Professional hereby grants to the Compan...,[],"[""To avoid any possibility of confusion of the...",['The Professional hereby grants to the Compan...,[],[],[],['acts or omissions reasonably determined by t...,[] [] ['This Agreement is not assignable by th...
507,[],[],[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [...
508,[],[],[],"[""Commencing on March 1, 2020 and thereafter, ...",[],[],[],[],[],[],...,[],[],[],[],[],[],['During the Term and any renewal terms of the...,"[""During the Term and any renewal terms of the...",[],"[] [] [] [""Commencing on March 1, 2020 and the..."


In [18]:
# ---------------- Step 1: Imports ----------------
import pandas as pd
from langgraph.graph import StateGraph, END
from transformers import pipeline

# ---------------- Step 2: Load CUAD filtered data ----------------
# url = "https://huggingface.co/datasets/theatticusproject/cuad/resolve/main/CUAD_v1/master_clauses.csv"
# cuad_df = pd.read_csv(url)

# risky_cols = [
#     "Termination For Convenience",
#     "Change Of Control",
#     "Anti-Assignment",
#     "Revenue/Profit Sharing",
#     "Price Restrictions",
#     "Minimum Commitment",
#     "Volume Restriction",
#     "Uncapped Liability",
#     "Cap On Liability",
#     "Liquidated Damages",
#     "Irrevocable Or Perpetual License",
#     "Source Code Escrow",
#     "Post-Termination Services",
#     "Audit Rights",
#     "Non-Compete",
#     "Exclusivity",
#     "Most Favored Nation",
#     "No-Solicit Of Customers",
#     "No-Solicit Of Employees",
#     "Non-Disparagement"
# ]

# existing_cols = [col for col in risky_cols if col in cuad_df.columns]
# filtered_df = cuad_df.dropna(how="all", subset=existing_cols)
# filtered_df["text"] = filtered_df[existing_cols].apply(
#     lambda row: " ".join(row.dropna().astype(str)), axis=1
# )

# ---------------- Step 3: Free HuggingFace LLM ----------------

from dataclasses import dataclass
llm = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", device=-1)

def analyze_clause(inputs):
    """Analyze a contract clause using the free HuggingFace LLM"""
    clause_text = inputs["clause"]
    prompt = f"Analyze the following contract clause and identify potential risks:\n\n{clause_text}\n\nSummary of risks:"
    output = llm(prompt, max_new_tokens=256, do_sample=False)[0]["generated_text"]
    return {"analysis": output}

# ---------------- Step 4: LangGraph with class-based schema ----------------
@dataclass
class ClauseInput:
    clause: str

@dataclass
class ClauseOutput:
    analysis: str

graph = StateGraph(state_schema=ClauseInput)
graph.add_node("AnalyzeClause", analyze_clause)
graph.set_entry_point("AnalyzeClause")
graph.set_finish_point("AnalyzeClause")  # finish at same node
app = graph.compile()

# ---------------- Step 5: Test ----------------
result = app.invoke({"clause": filtered_df.iloc[0]["text"]})
print(result["analysis"])


Loading checkpoint shards: 100%|██████████| 2/2 [03:45<00:00, 112.65s/it]


TypeError: 'ClauseInput' object is not subscriptable