<a href="https://colab.research.google.com/github/Harshal292004/KGPDSH/blob/master/StormArchitecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [52]:
from langchain.prompts.chat import ChatPromptTemplate
from pydantic import BaseModel, Field, validator, PrivateAttr
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from langchain.chains.combine_documents import create_stuff_documents_chain
from google.colab import userdata
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import PathwayVectorClient

import json
import re
from typing import List, Dict

class ScoreDetails(BaseModel):
    score: float = Field(description="Score of the aspect (0-10)")
    justification: str = Field(description="Explanation for the score")

    @validator('score')
    def validate_score(cls, v: float) -> float:
        if v < 0 or v > 10:
            raise ValueError('Score must be between 0 and 10')
        return round(v, 1)

# Evaluation Categories
class Significance(BaseModel):
    theoretical_contribution: ScoreDetails = Field(description="Assessment of theoretical contribution to the field")
    practical_impact: ScoreDetails = Field(description="Evaluation of practical applications and real-world impact")
    novelty: ScoreDetails = Field(description="Originality of the research and its contribution")

class MethodologyEvaluation(BaseModel):
    research_design: ScoreDetails = Field(description="Appropriateness and quality of research design")
    reproducibility: ScoreDetails = Field(description="Clarity and completeness of methods for reproduction")

class PresentationQuality(BaseModel):
    writing_clarity: ScoreDetails = Field(description="Clarity and effectiveness of writing")
    organization: ScoreDetails = Field(description="Logical flow and structure of the paper")
    literature_review: ScoreDetails = Field(description="Comprehensiveness of literature review")

class PaperEvaluation(BaseModel):
    paper_id: str = Field(description="Unique identifier for the paper")
    significance: Significance = Field(description="Evaluation of research significance")
    methodology: MethodologyEvaluation = Field(description="Detailed methodology assessment")
    presentation: PresentationQuality = Field(description="Quality of presentation")
    confidence_level: float = Field(description="Overall confidence in this review (0-10)")
    _overall_score: float = PrivateAttr(default=0.0)
    major_strengths: List[str] = Field(description="Major strengths of the paper")
    major_weaknesses: List[str] = Field(description="Major weaknesses of the paper")
    detailed_feedback: str = Field(description="Comprehensive feedback and suggestions")
    summary_of_paper: str = Field(description="Relevant summary for retrieval tasks")

    @property
    def overall_score(self) -> float:
        if self._overall_score == 0.0:
            weights = {
                'significance': 0.3,
                'methodology': 0.5,
                'presentation': 0.2,
            }

            sig_score = (self.significance.theoretical_contribution.score +
                         self.significance.practical_impact.score +
                         self.significance.novelty.score) / 3

            meth_score = (self.methodology.research_design.score +
                          self.methodology.reproducibility.score) / 2

            pres_score = (self.presentation.writing_clarity.score +
                          self.presentation.organization.score +
                          self.presentation.literature_review.score) / 3

            self._overall_score = round(
                weights['significance'] * sig_score +
                weights['methodology'] * meth_score +
                weights['presentation'] * pres_score ,
                3
            )
        return self._overall_score

    @validator('confidence_level')
    def validate_confidence(cls, v: float) -> float:
        if v < 0 or v > 10:
            raise ValueError('Confidence level must be between 0 and 10')
        return round(v, 1)

    def generate_summary_report(self) -> str:
        string = f"""
        Paper Review Summary (ID: {self.paper_id})
        ==========================================
        Review Confidence: {self.confidence_level}/10
        Overall Score: {self.overall_score}/10
        Key Strengths:
        {chr(10).join(f"- {strength}" for strength in self.major_strengths)}

        Areas for Improvement:
        {chr(10).join(f"- {weakness}" for weakness in self.major_weaknesses)}

        Detailed Feedback:
        {self.detailed_feedback}

        Summary of the Paper:
        {self.summary_of_paper}
        """
        return string

# Implementing STORM Architecture with LangGraph
class ConferenceAgent:
    def __init__(self, name: str, chat_model: ChatGroq, target_conference: str, conference_themes: str, conference_context:str):
        self.name = name
        self.chat_model = chat_model
        self.target_conference = target_conference
        self.conference_themes = conference_themes
        self.conference_context = conference_context

    def evaluate_paper(self, paper_summary: str) -> Dict:
        prompt = ChatPromptTemplate.from_messages([
            ('system',  f"""
            You are a representative for the conference {self.target_conference}. Strictly Focus Specially on the conference themes: {self.conference_themes}
            Use the following additional context to guide your evaluation:
            {self.conference_context}

            Evaluate the research paper based on:
            - Relevance to conference themes
            - Quality of methodology
            - Novelty of contribution
            Provide a score (0-10) and a justification for why the paper fits this conference.
            Respond is json format with the following keys
            - score: float
            - justification: str
            """),
            ('user', "Paper summary: {paper_summary}")
        ])
        chain = LLMChain(llm = self.chat_model, prompt=prompt)
        response = chain.run({"paper_summary": paper_summary})
        try:
            # Preprocess the response to clean up invalid characters
            cleaned_response = re.sub(r'[\x00-\x1F\x7F]', '', response)  # Remove control characters
            json_match = re.search(r'\{.*\}', cleaned_response, re.DOTALL)
            if json_match:
                json_response = json_match.group(0)
                response_dict = json.loads(json_response)
                return {
                    "score": float(response_dict["score"]),
                    "justification": response_dict["justification"],
                }
            else:
                raise ValueError(f"No JSON object found in the response: {response}")
        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to parse response as JSON: {response}") from e

class STORMSystem:
    def __init__(self, agents: List[ConferenceAgent]):
        self.agents = agents

    def discuss_and_decide(self, paper_summary: str) -> Dict:
        evaluations = []
        for agent in self.agents:
            evaluation = agent.evaluate_paper(paper_summary)
            evaluations.append({
                "conference": agent.target_conference,
                "score": evaluation["score"],
                "justification": evaluation["justification"],
            })

        # Determine the best conference based on scores
        best_conference = max(evaluations, key=lambda x: x["score"])
        return {
            "best_conference": best_conference["conference"],
            "justification": best_conference["justification"],
        }

# Example Usage
if __name__ == "__main__":
    chat_model = ChatGroq(groq_api_key = userdata.get("GROQ_API_KEY").strip(), model="llama3-8b-8192")

    # Define agents for each conference
    agents = [
        ConferenceAgent(name="Agent CVPR", chat_model=chat_model, target_conference="CVPR",
                        conference_themes = """
                        CVPR focuses on the field of computer vision, image processing, and pattern recognition. Key themes include:
                          Object detection and recognition.
                          Image segmentation and scene understanding.
                          Visual tracking and motion analysis.
                          3D vision, stereo vision, and depth estimation.
                          Deep learning for vision tasks (CNNs, Vision Transformers, etc.).
                          Applications in medical imaging, autonomous vehicles, and robotics.
                          Video processing and understanding.
                          Computational photography and imaging.
                          Low-level vision (denoising, super-resolution).
                          Vision-based augmented reality and virtual reality.
                        """,
                        conference_context = """
                        Examples of accepted papers:
                          - YOLO: Real-Time Object Detection
                              Summary: Proposes an efficient deep learning model for real-time object detection with state-of-the-art accuracy.
                          - SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation
                              Summary: Focuses on pixel-wise image segmentation using deep neural networks, with applications in autonomous systems.
                          - 3D Object Detection and Localization Using RGB and Depth Data
                              Summary: Combines RGB and depth information for accurate 3D object detection in indoor environments.
                        Evaluation Criteria:
                         - Relevance to core computer vision topics.
                         - Advances in methodology or application.
                         - Practical implications for industries like healthcare, robotics, and transportation.
                        """
                        ),
        ConferenceAgent(name="Agent NeurIPS", chat_model=chat_model, target_conference="NeurIPS"
                        ,conference_themes ="""
                        NeurIPS emphasizes machine learning, computational neuroscience, and AI-related topics. Key themes include:
                            Deep learning (e.g., architectures, generative models, optimization for deep networks, foundation models, LLMs)
                            Reinforcement learning and decision-making.
                            Probabilistic models and Bayesian learning.
                            Optimization techniques for machine learning.
                            Representation learning and embeddings.
                            AI for healthcare, climate science, and sustainability.
                            Neuroscience-inspired algorithms and theories.
                            Ethical AI, fairness, and explainability.
                        """,
                        conference_context = """
                        Examples of accepted papers:
                          - Attention Is All You Need
                             Summary: Introduces the transformer architecture, which revolutionized NLP and other domains by using self-attention mechanisms.
                          - Generative Adversarial Networks (GANs)
                             Summary: Proposes a novel framework for training generative models using adversarial networks.
                          - Adam: A Method for Stochastic Optimization
                             Summary: Proposes a new optimization algorithm that balances convergence speed and stability in deep learning.
                        Evaluation Criteria:
                          - Novelty and theoretical contributions.
                          - Experimental rigor and reproducibility.
                          - Broader implications for machine learning and interdisciplinary applications.
                        """
        ),
        ConferenceAgent(name="Agent EMNLP", chat_model=chat_model, target_conference="EMNLP"
                        ,conference_themes = """
                        EMNLP specializes in NLP and computational linguistics. Key themes include:
                          Machine translation and cross-lingual NLP.
                          Large language models and foundational models.
                          Sentiment analysis and opinion mining.
                          Dialogue systems and conversational AI.
                          Question answering and information retrieval.
                          Text summarization and abstraction.
                          Morphology, syntax, and semantics.
                          Multimodal NLP (text + image/audio fusion).
                          Ethical concerns in NLP (bias, toxicity detection).
                          Low-resource and multilingual NLP.
                        """,
                        conference_context = """
                        Examples of accepted papers:

Paper Title: Pre-trained Language Models for Text-to-Text Generation
Summary: This paper explores pre-trained models (e.g., T5) for a variety of text-to-text NLP tasks, showcasing improvements across summarization, question answering, and translation.

Paper Title: Adversarial Training for Robust Text Classification
Summary: Proposes an adversarial training framework that enhances the robustness of text classification models against noisy and adversarial inputs.

Paper Title: Knowledge-Enhanced Contextual Representations for Entity Linking
Summary: Combines external knowledge sources with contextual embeddings to improve entity linking in complex domains.

Paper Title: Multimodal Sentiment Analysis with Attention Mechanisms
Summary: Introduces a framework for combining visual and textual inputs to improve sentiment detection in videos.

Evaluation Criteria:
Relevance to NLP:

Papers should directly address core NLP tasks, methodologies, or applications.
Emphasis on empirical evaluation and innovation in language processing methods.
Quality of Methodology:

Rigorous experiments, proper baselines, and thorough ablation studies.
Use of diverse and large-scale datasets to validate results.
Novelty of Contribution:

New architectures, algorithms, or findings that advance the field.
Extensions of existing methods to novel tasks or domains.
Broader Impacts:

Ethical implications of the research (e.g., bias in models, data privacy).
Potential for cross-disciplinary applications (e.g., in healthcare, education, or social media).
Themes Breakdown with Examples:
Natural Language Understanding:

Semantic role labeling, coreference resolution, and discourse parsing.
Example: A New Framework for Semantic Parsing Using Pre-Trained Transformers.
Language Generation:

Dialogue systems, machine translation, and creative text generation.
Example: Controlled Text Generation with Discrete and Continuous Latent Variables.
Information Extraction:

Named entity recognition, relation extraction, and knowledge graph construction.
Example: Joint Entity and Relation Extraction with Transformer-Based Models.
Multimodal NLP:

Integrating vision, audio, and textual modalities.
Example: Aligning Text and Vision for Multimodal Machine Translation.
Social and Ethical Considerations:

Bias, fairness, interpretability, and environmental impacts of NLP models.
Example: Bias Mitigation in Pre-trained Models: A Data Augmentation Approach.

                        """

        ),
        ConferenceAgent(name="Agent KDD", chat_model=chat_model, target_conference="KDD"
                        ,conference_themes = """
                        KDD centers on data mining, big data, and applied AI. Key themes include:
                            Scalable data mining algorithms.
                            Graph data and network analysis.
                            Temporal and sequential data mining.
                            Anomaly detection and predictive modeling.
                            Recommender systems and personalization.
                            Causal inference and counterfactual reasoning.
                            Applications in finance, e-commerce, and marketing.
                            Data visualization and interpretability.
                            Data ethics and privacy-preserving techniques.
                            AI and data-driven solutions for social good.
                        """,

                        conference_context="""
                        Examples of accepted papers:

Paper Title: Scalable Graph Neural Networks for Large-Scale Social Network Analysis
Summary: Introduces a scalable GNN framework that handles billion-scale graphs with high efficiency, showcasing applications in social network analysis.

Paper Title: Causal Discovery in High-Dimensional Data Using Deep Learning
Summary: Proposes a deep learning-based approach for identifying causal relationships in high-dimensional data.

Paper Title: Fair Representation Learning with Adversarial Networks
Summary: Develops a framework for learning fair data representations while minimizing demographic bias in downstream tasks.

Paper Title: Adaptive Online Learning for Real-Time Recommender Systems
Summary: Presents an adaptive online learning algorithm that updates recommender system models in real time based on user interactions.

Evaluation Criteria:
Relevance to Knowledge Discovery and Data Mining:

Papers must address core challenges in data mining, machine learning, or related applications.
Emphasis on practical applications and scalability to real-world data.
Methodological Rigor:

Strong theoretical foundations or innovative empirical methodologies.
Comprehensive experiments with real-world datasets and benchmarks.
Scalability and Efficiency:

Solutions must handle large-scale data effectively, both in computation and memory.
Novelty and Impact:

New techniques, algorithms, or applications that significantly advance the state-of-the-art.
Real-world relevance and potential societal or economic impact.
Broader Implications:

Ethical considerations, such as fairness, transparency, and responsible use of data.
Long-term applicability across industries and domains.
                        """
        ),
    ]

    storm_system = STORMSystem(agents=agents)

    # Input a research paper summary
    paper_summary = """
    Advancements in 3D Food Modeling: A Review of the
MetaFood Challenge Techniques and Outcomes
The growing focus on leveraging computer vision for dietary oversight and nutri-
tion tracking has spurred the creation of sophisticated 3D reconstruction methods

for food. The lack of comprehensive, high-fidelity data, coupled with limited
collaborative efforts between academic and industrial sectors, has significantly
hindered advancements in this domain. This study addresses these obstacles by
introducing the MetaFood Challenge, aimed at generating precise, volumetrically

accurate 3D food models from 2D images, utilizing a checkerboard for size cal-
ibration. The challenge was structured around 20 food items across three levels

of complexity: easy (200 images), medium (30 images), and hard (1 image). A
total of 16 teams participated in the final assessment phase. The methodologies
developed during this challenge have yielded highly encouraging outcomes in
3D food reconstruction, showing great promise for refining portion estimation in
dietary evaluations and nutritional tracking. Further information on this workshop
challenge and the dataset is accessible via the provided URL.
    """
    decision = storm_system.discuss_and_decide(paper_summary)
    print(f"Best Conference: {decision['best_conference']}")
    print(f"Justification: {decision['justification']}")



<ipython-input-52-57665ae7619a>:18: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  @validator('score')
<ipython-input-52-57665ae7619a>:79: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  @validator('confidence_level')


Best Conference: CVPR
Justification: This paper fits the CVPR conference themes as it focuses on 3D vision and pattern recognition, specifically in the application of computer vision for dietary oversight and nutrition tracking. The paper presents a comprehensive review of 3D food modeling techniques and introduces the MetaFood Challenge, which aims to generate precise and volumetrically accurate 3D food models from 2D images. The challenge involves 3D reconstruction methods, which aligns with the conference themes. The paper also presents encouraging outcomes in 3D food reconstruction, showing great promise for refining portion estimation in dietary evaluations and nutritional tracking. However, the paper may not have significant advances in methodology or novel contributions, which is why it scores 7.5 rather than higher. Nevertheless, it is a relevant and practical contribution to the field of computer vision.


In [50]:
pip install langchain langchain_community langgraph langchain_groq langchain_huggingface pathway

[31mERROR: Could not find a version that satisfies the requirement pathway_client (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pathway_client[0m[31m
[0m

In [None]:
pip install --upgrade langgraph



In [9]:
# Pathway connector
from langchain_community.vectorstores import PathwayVectorClient

client = PathwayVectorClient(url="https://demo-document-indexing.pathway.stream")


In [12]:
query = "What is Pathway?"
docs = client.similarity_search(query, timeout=0)



In [14]:
docs[0].page_content

'3 2 0 2\nl u J\n2 1\n]\nG L . s c [\n1 v 6 1 1 3 1 . 7 0 3 2 : v i X r a\nPathway: a fast and flexible unified stream data processing framework for analytical and Machine Learning applications\nMichał Bartoszkiewicz\nJan Chorowski∗\nAdrian Kosowski\nJakub Kowalski\nSergey Kulik\nMateusz Lewandowski\nKrzysztof Nowicki\nKamil Piechowiak\nOlivier Ruas\nZuzanna Stamirowska\nPrzemysław Uznański\n{firstname.lastname}@pathway.com Pathway.com Paris, France\nABSTRACT We present Pathway, a new unified data processing framework that can run workloads on both bounded and unbounded data streams. The framework was created with the original motivation of re- solving challenges faced when analyzing and processing data from the physical economy, including streams of data generated by IoT and enterprise systems. These required rapid reaction while calling for the application of advanced computation paradigms (machine- learning-powered analytics, contextual analysis, and other elements of complex event 

In [66]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from pathway.xpacks.llm.vector_store import VectorStoreServer
import os

os.environ['HF_TOKEN'] = userdata.get("HF_TOKEN")

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')


import pathway as pw

# Example data for table creation
data = [
    {"text": "This is a test justification.", "id": "1"},
    {"text": "Another piece of text.", "id": "2"}
]

try:
    # Create a Pathway table from records
    pathway_table = pw.Table.from_records(
        data,
        schema=pw.Schema(columns={
            "text": pw.Column(type=str),
            "id": pw.Column(type=str)
        })
    )
    print("Pathway Table created successfully!")
except Exception as e:
    print(f"Error creating Pathway Table: {e}")

Error creating Pathway Table: 'super' object has no attribute '__getattr__'


In [64]:
pip uninstall pathway

Found existing installation: pathway 0.16.3
Uninstalling pathway-0.16.3:
  Would remove:
    /usr/local/bin/pathway
    /usr/local/lib/python3.10/dist-packages/pathway-0.16.3.dist-info/*
    /usr/local/lib/python3.10/dist-packages/pathway/*
Proceed (Y/n)? y
  Successfully uninstalled pathway-0.16.3


In [65]:
pip install pathway

Y
Collecting pathway
  Using cached pathway-0.16.3-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Using cached pathway-0.16.3-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (60.6 MB)
Installing collected packages: pathway
Successfully installed pathway-0.16.3
