In [7]:
import django
django.setup()
from sefaria.model import *
import pickle
import os
from typing import List
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# Models used in analyze_sources
class Commentary(BaseModel):
    ref: str = Field(description="Identifier for the commentary text")
    index_title: str = Field(description="Title of the book")
    author: str = Field(description="Author of the commentary")
    en_text: str = Field(description="English translation of the commentary")


class BaseText(BaseModel):
    ref: str = Field(description="Identifier for the text")
    english_version: str = Field(description=" English translation of the text")
    hebrew_version: str = Field(description="Original Hebrew version of the text")
    num_linked: int = Field(description="Number of linked commentaries")
    linked_commentaries: List[Commentary] = Field(description="Commentaries linked to this text")

    @classmethod
    def from_ref(cls, r: Ref, exclude_refs = None):
        """

        :param r: Ref
        :param exclude_refs: list of Refs to exclude from the linked_commentaries.  Can be Index level or lower.
        :return: baseText and linkedRefs
        """
        # Get the English text for this ref
        tc = r.text('en')
        en_text = tc.remove_html_and_make_presentable(tc.text)
        he_text = tc.remove_html_and_make_presentable(r.text("he").text)

        # get all the connected commentary that is in English
        linked_refs = r.linkset().refs_from(r)
        eng_linked_refs = [a for a in filter(lambda x: x.is_text_fully_available('en'), linked_refs)]
        if exclude_refs:
            eng_linked_refs = [a for a in eng_linked_refs if not any(e.contains(a) for e in exclude_refs)]
        commentaries = []
        for lr in eng_linked_refs:
            c = {
                "ref": lr.normal(),
                "index_title": lr.index.title,
                "author": lr.index.author_objects()[0].get_primary_title(
                    "en") if lr.index.author_objects() else "An Unknown Author",
                "en_text": lr.text('en').as_string()
            }
            commentaries += [Commentary(**c)]
        num_linked = (len(eng_linked_refs))

        return cls(
            ref=r.normal(),
            english_version=en_text,
            hebrew_version=he_text,
            num_linked=num_linked,
            linked_commentaries=commentaries
        )


class QuestionInterest(BaseModel):
    ref: str = Field(description="Identifier for the commentary text")
    question: str = Field(description="Questions that the source text seeks to answer")
    interest_rating: int = Field(description="Rating, from 1-10 for how interesting the question is")


class QuestionsInterest(BaseModel):
    # All the questions for a given commentary
    ref: str = Field(description="Identifier for the commentary text")
    questions_interest: List[QuestionInterest] = Field(description="Questions that the source text seeks to answer, and how interesting they are.")


class AllSourceQuestions(BaseModel):
    ref: str = Field(description="Identifier for the base text")
    all_questions: List[QuestionsInterest] = Field(description="Questions that the commentaries on the source text seek to answer, and how interesting they are.")


# Models used in find_top_questions, choose_sources
class QuestionGrouping(BaseModel):
    question: str = Field(description="Grouped question")
    included_commentaries: List[str] = Field(description="Ref indentifiers for the original questions that this one includes")


class QuestionGroupings(BaseModel):
    question_groupings: List[QuestionGrouping] = Field(description="Grouped questions")


class CommentSummary(BaseModel):
    commentaryRef: str = Field(description="Ref Identifier for the commentary text")
    summaryText: str = Field(description="Summary of the commentary")


class AnsweredQuestion(BaseModel):
    question: str = Field(description="Question being addressed")
    commentaries: List[CommentSummary] = Field(description="Commentaries that address the question")


class AnsweredQuestions(BaseModel):
    ref: str = Field(description="Identifier for the base text")
    questions: List[AnsweredQuestion] = Field(description="Questions that the commentaries seek to answer")



In [12]:
with open("questions-Pirkei_Avot.3.18.pkl", 'rb') as f:
    c = pickle.load(f)


In [18]:
print(c.json(indent=2))

{
  "ref": "Pirkei Avot 3:18",
  "all_questions": [
    {
      "ref": "Sforno on Exodus 9:19:1",
      "questions_interest": [
        {
          "ref": "Sforno on Exodus 9:19:1",
          "question": "Why does God warn the Egyptians to bring their cattle and servants inside before the hail strikes?",
          "interest_rating": 6
        },
        {
          "ref": "Sforno on Exodus 9:19:1",
          "question": "Does God care more about people or animals?",
          "interest_rating": 8
        },
        {
          "ref": "Sforno on Exodus 9:19:1",
          "question": "What is the basis for saying that people are more beloved to God than animals?",
          "interest_rating": 7
        }
      ]
    },
    {
      "ref": "Sforno on Deuteronomy 33:3:1",
      "questions_interest": [
        {
          "ref": "Sforno on Deuteronomy 33:3:1",
          "question": "If the Jewish people are God's most precious nation, does that mean God does not care about other nations?",
 

In [10]:
c.dict()

{'ref': 'Pirkei Avot 1:3',
 'questions': [{'question': 'What is the ideal motivation and mindset for serving God?',
   'commentaries': [{'commentaryRef': 'Duties of the Heart, Introduction of the Author 9',
     'summaryText': 'Bachya ibn Pekuda: Study theology to understand our religion, not for worldly benefits. Learn out of love, not for honor or personal gain.'},
    {'commentaryRef': 'Duties of the Heart, Fourth Treatise on Trust 4:114',
     'summaryText': "He also says: Don't serve God expecting reward, but rather strive to thank Him for His kindnesses out of gratitude, not hope for future reward."},
    {'commentaryRef': 'Mesilat Yesharim 16:8',
     'summaryText': 'Moses Chaim Luzzatto (Ramchal): Serving God for the sake of reward is not ideal. One should strive to serve God for its own sake to reach perfection.'}]},
  {'question': 'What is the difference between serving God for reward vs. serving without expectation of reward?',
   'commentaries': [{'commentaryRef': 'Tosafot 

In [23]:
b = BaseText.from_ref(Ref("Pirkei Avot 3:18"))
print(b.linked_commentaries[0])
msg = ""
for c in b.linked_commentaries:
    msg += f"{c.ref} - {c.en_text}\n###\n"
print(msg)

ref='Sforno on Exodus 9:19:1' index_title='Sforno on Exodus' author='Ovadiah Seforno' en_text='ועתה שלח העז את מקנך, in order that also the servants supervising the cattle would escape the hail together with their herds.. If G’d cared about the beasts, He most certainly cared about the human beings, as we know from Avot 3,18 חביב אדם כי נברא בצלם, “man is beloved as he (alone) has been created in G’d’s image.”'
Sforno on Exodus 9:19:1 - ועתה שלח העז את מקנך, in order that also the servants supervising the cattle would escape the hail together with their herds.. If G’d cared about the beasts, He most certainly cared about the human beings, as we know from Avot 3,18 חביב אדם כי נברא בצלם, “man is beloved as he (alone) has been created in G’d’s image.”
###
Sforno on Deuteronomy 33:3:1 - אף חובב עמים, he says to G’d that: “I am aware that You are fond of other peoples also, as when You said that the Jewish people are the most precious מכל העמים, from among all the nations, You meant that t

In [24]:
b.english_version

'Rabbi Eliezer Hisma said: the laws of mixed bird offerings and the key to the calculations of menstruation days, these are the body of the halakhah. The calculation of the equinoxes and gematria are the desserts of wisdom.'

In [25]:
b.hebrew_version


'רַבִּי אֱלִיעֶזֶר בֶּן חִסְמָא אוֹמֵר, קִנִּין וּפִתְחֵי נִדָּה, הֵן הֵן גּוּפֵי הֲלָכוֹת. תְּקוּפוֹת וְגִימַטְרִיאוֹת, פַּרְפְּרָאוֹת לַחָכְמָה:'

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", google_api_key='AIzaSyDjlRltmt8ZDdfCF9BhH3xNfCtkInIbTs4')
result = llm.invoke("Tell me what you know about Sefaria, but do it in Yeshivish English")
print(result.content)


Nu, Sefaria? It's a gevaldige online resource, a digitized treasure trove of Torah, Tanakh, and all sorts of Jewish texts. You've got Gemara, Mishnah, Halacha seforim, commentaries - a whole shmorgishbord of learning, all for free. 

It's a real maaseh nissim for anyone looking to learn Torah, from the yeshiva bochur to the baal habus looking to chap arein a blatt Gemara. You can search by topic, keyword, or even specific line, and it'll bring up all the relevant sources. Plus, it's got translations and commentaries in English and other languages, so even if your Loshon Kodesh is a bissel rusty, you can still get a geshmak of the sugya.

It's a real matana for the Jewish people, Sefaria. A real kiddush Hashem. 


In [12]:
class GeminiQuestion(BaseModel):
    question: str = Field(description="Question being addressed")
    sources: List[str] = Field(description="Ref indentifiers for the sources that address this question")


class GeminiQuestions(BaseModel):
    ref: str = Field(description="Identifier for the base text")
    questions: List[GeminiQuestion] = Field(description="Questions that the commentaries seek to answer")

parser = PydanticOutputParser(pydantic_object=GeminiQuestions)
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"ref": {"title": "Ref", "description": "Identifier for the base text", "type": "string"}, "questions": {"title": "Questions", "description": "Questions that the commentaries seek to answer", "type": "array", "items": {"$ref": "#/definitions/GeminiQuestion"}}}, "required": ["ref", "questions"], "definitions": {"GeminiQuestion": {"title": "GeminiQuestion", "type": "object", "properties": {"question": {"title": "Question", "description": "Question being addressed", "type": "string"}, "sources": {"title": "Sources", "descri