In [74]:
from langgraph.graph import StateGraph,START,END
from typing import TypedDict
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import Literal
from typing import TypedDict, Literal, Annotated
from langchain_core.messages import SystemMessage, HumanMessage
import time

In [75]:
import pandas as pd
import operator

In [76]:
from typing import TypedDict
from typing_extensions import Annotated

class classification_state(TypedDict):
    description: str
    class_category: str
    class_category_reason: str
    response: Annotated[list[str], operator.add]
    reason: str

In [89]:
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash',temperature=0.3)

In [90]:
from pydantic import BaseModel, Field

class DescriptionEvaluation(BaseModel):
    evaluation: Literal['Pathogenic', 'Benign','Both'] = Field(..., description="ACMG Attribute class only accepted values are Pathogenic, Benign, Both")
    reason: str = Field(..., description="Reason for the classification.")

class ACMGEvaluation(BaseModel):
    evaluation: list[str] = Field(..., description="ACMG Attribute")
    reason: str = Field(..., description="Reason for the classification.")

class Prompt(BaseModel):
    prompt: str = Field(..., description="Attribute Prompt")

In [91]:
classification_model = model.with_structured_output(DescriptionEvaluation)
attribute_model = model.with_structured_output(ACMGEvaluation)
prompt_model = model.with_structured_output(Prompt)

In [92]:
def description_classification(state: classification_state):
    description= state['description']
    with open('classification_prompt.txt','r') as file:
        prompt = file.readlines()
    messages = [
    SystemMessage(content=
    f"""{prompt}"""
     ),
    HumanMessage(content=f""" Here is the given description:
     {description}, 
        ########### respond only in the structure format:
        evaluation: description="ACMG Attribute class only accepted values are Pathogenic, Benign, Both
        feedback: Reason for the classification.
    """)
    ]
    response = classification_model.invoke(messages)
    return {"class_category":response.evaluation,"class_category_reason":response.reason}

In [93]:
def benign_classification(state: classification_state):
    description= state['description']
    with open('benign_prompt.txt','r') as file:
        prompt = file.readlines()
    messages = [
    SystemMessage(content=f"{prompt}"),
    HumanMessage(content=f""" Here is the given description:
     {description}, 
        ########### respond only in the structure format:
        evaluation: list[str] = Field(..., description="ACMG Attribute")
    reason: str = Field(..., description="Reason for the classification.")
    """)
    ]
    response = attribute_model.invoke(messages)
    return {"response":response.evaluation,"reason":response.reason}

In [94]:
def pathogenic_classification(state: classification_state):
    description= state['description']
    with open('pathogenic_prompt.txt','r') as file:
        prompt = file.readlines()
    messages = [
    SystemMessage(content=f"{prompt}"),
    HumanMessage(content=f""" Here is the given description:
     {description}, 
        ########### respond only in the structure format:
        evaluation: list[str] = Field(..., description="ACMG Attribute")
    reason: str = Field(..., description="Reason for the classification.")
    """)
    ]
    response = attribute_model.invoke(messages)
    return {"response":response.evaluation,"reason":response.reason}

In [95]:
def both_classification(state: classification_state):
    description= state['description']
    with open('both_prompt.txt','r') as file:
        prompt = file.readlines()
    messages = [
    SystemMessage(content=f"{prompt}"),
    HumanMessage(content=f""" Here is the given description:
     {description}, 
        ########### respond only in the structure format:
        evaluation: list[str] = Field(..., description="ACMG Attribute")
    reason: str = Field(..., description="Reason for the classification.")
    """)
    ]
    response = attribute_model.invoke(messages)
    return {"response":response.evaluation,"reason":response.reason}

In [96]:
def workflow_selection(state: classification_state):
    workflow = {"next":""}
    if state['class_category'] == 'Benign':
        workflow['next'] = "benign_classification"
    elif state['class_category'] == 'Pathogenic':
        workflow['next'] = 'pathogenic_classification'
    elif state['class_category'] == 'Both':
        workflow['next'] = "both_classification"
    else:
        workflow['next'] = "end"
    return workflow

In [97]:
graph = StateGraph(classification_state)

graph.add_node('description_classification', description_classification)
graph.add_node('workflow_selection', workflow_selection)

graph.add_node('benign_classification', benign_classification)
graph.add_node('pathogenic_classification', pathogenic_classification)
graph.add_node('both_classification', both_classification)

graph.add_edge(START, 'description_classification')
graph.add_edge('description_classification','workflow_selection')

graph.add_conditional_edges('workflow_selection',lambda x: x["next"],
    {"end": END, "benign_classification": "benign_classification","pathogenic_classification":"pathogenic_classification","both_classification":"both_classification"})

graph.add_edge('both_classification',END)
graph.add_edge('benign_classification',END)
graph.add_edge('pathogenic_classification',END)

workflow = graph.compile()


In [98]:
df=pd.read_csv('final_classification.csv')

df_balanced = df.groupby('class', group_keys=False).apply(
    lambda x: x.sample(n=33, random_state=42)
)
df_random = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

  df_balanced = df.groupby('class', group_keys=False).apply(


In [99]:
output_class = []
output_response = []
for i in df_random.values[:10]:
    initial_state = {'description': i[2] }
    res=workflow.invoke(initial_state)
    output_response.append(res['response'])
    output_class.append(res['class_category'])
    


In [100]:
output_class

['Both',
 'Both',
 'Both',
 'Pathogenic',
 'Pathogenic',
 'Pathogenic',
 'Pathogenic',
 'Benign',
 'Pathogenic',
 'Benign']

In [69]:
class_=df_random.iloc[:10,:]

In [70]:
class_['class'].values

array(['Both', 'Both', 'Pathogenic', 'Benign', 'Pathogenic', 'Pathogenic',
       'Both', 'Both', 'Benign', 'Benign'], dtype=object)

In [71]:
output_class

['Both',
 'Benign',
 'Both',
 'Both',
 'Pathogenic',
 'Pathogenic',
 'Pathogenic',
 'Benign',
 'Both',
 'Benign']

In [72]:
from sklearn.metrics import classification_report

In [101]:
print(classification_report(class_['class'].values, output_class))

              precision    recall  f1-score   support

      Benign       0.50      0.33      0.40         3
        Both       0.67      0.50      0.57         4
  Pathogenic       0.40      0.67      0.50         3

    accuracy                           0.50        10
   macro avg       0.52      0.50      0.49        10
weighted avg       0.54      0.50      0.50        10



In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.iloc[:, :-1]
y = df['class'] 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.33,
    stratify=y,
    random_state=42
)

In [None]:
X_train['class']=y_train

In [None]:
df_balanced['class'].value_counts()

In [None]:
df