In [21]:
# For testing if the llm is working or not

from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

load_dotenv()

model = ChatGoogleGenerativeAI(model='gemini-2.0-flash')

result = model.invoke('What is the capital of India')

print(result.content)

The capital of India is **New Delhi**.


In [22]:
%pip install langchain openai faiss-cpu tiktoken
%pip install sentence-transformers

Note: you may need to restart the kernel to use updated packages.




Note: you may need to restart the kernel to use updated packages.




In [None]:
# %pip install "unstructured[pdf]"
# %pip install unstructured

Collecting effdet (from unstructured[pdf])
  Using cached effdet-0.4.1-py3-none-any.whl.metadata (33 kB)
Using cached effdet-0.4.1-py3-none-any.whl (112 kB)
Installing collected packages: effdet
Successfully installed effdet-0.4.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
# %pip install -U langchain-community

Note: you may need to restart the kernel to use updated packages.


In [16]:
import os
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings 
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader
from langchain.schema import Document

In [17]:
RECIPES_PATH = r"openrewrite_scraper\java_recipes.jsonl"
RUNNING_RECIPES_PATH = r"openrewrite_scraper\running_recipes.jsonl"
POPULAR_RECIPES_PATH = r"openrewrite_scraper\popular_recipe_guides.jsonl"
AUTHORING_RECIPES_PATH = r"openrewrite_scraper\authoring_recipes.jsonl"

MIGRATION_GUIDE_PDF = r"jdk-migration-guide.pdf"
VECTOR_DB_PATH = r"faiss_index"

In [18]:
# Load your .jsonl file
def load_jsonl(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

# Convert to LangChain Documents
def convert_to_documents(data):
    docs = []
    for item in data:
        metadata = {"url": item["url"], "title": item["title"]}
        docs.append(Document(page_content=item["content"], metadata=metadata))
    return docs

# Load Migration PDF
def load_pdf(path):
    loader = UnstructuredPDFLoader(path)
    return loader.load()

# Split each document into smaller chunks
def split_documents(documents, chunk_size=500, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", ".", " "],
    )
    return splitter.split_documents(documents)

# Embed using GoogleAI or replace with HuggingFaceEmbeddings()
def create_embeddings_model():
    return GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Store to FAISS
def store_to_faiss(chunks, embeddings, save_path="faiss_index"):
    vectorstore = FAISS.from_documents(chunks, embeddings)
    vectorstore.save_local(save_path)
    return vectorstore

def update_vector_db(chunks, embeddings, vector_path="faiss_index"):
    # Load existing vector store if available
    if os.path.exists(vector_path):
        vectorstore = FAISS.load_local(vector_path, embeddings, allow_dangerous_deserialization=True)
        print("✅ Loaded existing vector store.")
    else:
        # Only create a new vectorstore if there are chunks to add
        if len(chunks) == 0:
            raise ValueError("No documents to add to the new vector store.")
        vectorstore = FAISS.from_documents(chunks, embeddings)
        print("🆕 Created new vector store.")
        vectorstore.save_local(vector_path)
        print(f"📦 Vector DB created at: {vector_path}")
        return  # No need to add again below

    # If updating existing, add new chunks
    if len(chunks) > 0:
        vectorstore.add_documents(chunks)
        vectorstore.save_local(vector_path)
        print(f"📦 Vector DB updated at: {vector_path}")
    else:
        print("No new chunks to add.")

# Main pipeline
def build_rag_index(jsonl_path, output_path="faiss_index"):
    data = load_jsonl(jsonl_path)
    docs = convert_to_documents(data)
    chunks = split_documents(docs)
    embeddings = create_embeddings_model()
    store = store_to_faiss(chunks, embeddings, output_path)
    print(f"✅ Vector store saved to: {output_path}")

In [19]:
# Run all
def build_knowledge_base():
    print("📥 Loading recipe docs...")
    recipe_data = load_jsonl(RECIPES_PATH)
    running_recipes = load_jsonl(RUNNING_RECIPES_PATH)
    popular_recipe_guides = load_jsonl(POPULAR_RECIPES_PATH)
    authoring_recipes = load_jsonl(AUTHORING_RECIPES_PATH)

    recipe_docs = convert_to_documents(recipe_data)
    running_docs = convert_to_documents(running_recipes)
    popular_docs = convert_to_documents(popular_recipe_guides)
    authoring_docs = convert_to_documents(authoring_recipes)

    
    recipe_chunks = split_documents(recipe_docs)
    running_chunks = split_documents(running_docs)
    popular_chunks = split_documents(popular_docs)
    authoring_chunks = split_documents(authoring_docs)

    print("📥 Loading migration guide PDF...")
    migration_docs = load_pdf(MIGRATION_GUIDE_PDF)
    for doc in migration_docs:
        doc.metadata["title"] = "Java Migration Guide"
        doc.metadata["url"] = "https://www.oracle.com/java/technologies/javase/migration-guide.html"

    migration_chunks = split_documents(migration_docs, 1000, 200)

    print("🧠 Updating vector DB with all chunks...")
    all_chunks = recipe_chunks + migration_chunks + running_chunks + popular_chunks + authoring_chunks
    embeddings = create_embeddings_model()
    update_vector_db(all_chunks, embeddings, VECTOR_DB_PATH)

In [20]:
build_knowledge_base()

📥 Loading recipe docs...
📥 Loading migration guide PDF...
🧠 Updating vector DB with all chunks...
🆕 Created new vector store.
📦 Vector DB created at: faiss_index


In [21]:
# build_rag_index("openrewrite_scraper\cleaned_recipes.jsonl")

In [45]:
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

db = FAISS.load_local(
    "faiss_index",
    GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
    allow_dangerous_deserialization=True
)

In [46]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [47]:
# from langchain.prompts import PromptTemplate

# template = """
# You are an expert OpenRewrite assistant helping developers perform Java code migrations.

# You are given some documentation chunks as context. Follow these rules:

# 1. If the user's request matches a known recipe (described in the context), suggest the recipe name(s) and explain briefly how to use it.
# 2. If no exact recipe exists, check if the request could be fulfilled by creating a custom recipe.
#    - If yes, explain how to create a custom recipe using `rewrite.yml`, Java DSL, or by extending OpenRewrite base classes.
#    - Use examples from the context if available.
# 3. If neither is possible, say:
#    "There is no known recipe or migration method available for this case."

# Always cite the source URL(s) if you use any documentation.

# --------------------
# Context:
# {context}

# Question:
# {question}
# """

# prompt = PromptTemplate(
#     input_variables=["context", "question"],
#     template=template
# )


In [48]:
from langchain.prompts import PromptTemplate

template = """
You are a highly reliable Java migration and refactoring assistant powered by OpenRewrite documentation and Oracle migration guides.

You are provided with contextual documentation chunks related to:
- Java version upgrades and migration steps (from Oracle migration guide and OpenRewrite popular recipe guides)
- Prebuilt OpenRewrite recipes and how to use them
- Authoring custom OpenRewrite recipes when needed

Follow this reasoning process **strictly**:

---

**STEP 1: Migration Plan Requested**
- If the user asks how to migrate from one version or framework to another (e.g., Java 8 to 17, Spring Boot 2 to 3, JUnit 4 to 5):
  - First search for popular migration guides or recipes in the context (e.g., "popular-recipe-guides").
  - If found, return that migration plan clearly with examples and steps.
  - If not found, continue to STEP 2.

---

**STEP 2: Code Transformation / Refactoring Requested**
- If the user asks about specific code changes (e.g., update import, change class usage, rename method), or mentions recipes:
  - First try to find existing OpenRewrite recipes in the context.
  - If found, return the recipe name, a short description, and `rewrite.yml` usage if available.
  - If no exact recipe found, continue to STEP 3.

---

**STEP 3: Recipe Not Found → Generate Custom Recipe**
- If no prebuilt recipe is available, try to generate a custom OpenRewrite recipe using knowledge from the authoring-recipes documentation in the context.
- Output the custom `rewrite.yml` example and Java implementation if possible.
- Mention that it's a generated solution based on the available best practices.

---

**STEP 4: No Match**
- If none of the above steps succeed, say:
  **"No matching recipe or migration plan was found for this request."**

---

Always:
- Provide clear instructions or YAML snippets
- Include source URLs or context info if available
- Keep answers practical and focused

--------------------
Context:
{context}

Question:
{question}
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)


In [49]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [50]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True,
)

In [51]:
query = "Give me recipe to convert spring 2 to spring 3"

response = qa_chain.invoke(query)
print(response['result'])

Here's a plan to migrate from Spring Boot 2.x to Spring Boot 3.x using OpenRewrite:

**Recipe: Migrate to Spring Boot 3.x**

This composite recipe automates the migration from Spring Boot 2.x to Spring Boot 3.x. It updates build files, modifies deprecated APIs, and migrates configuration settings.

**Steps:**

1.  **Configure OpenRewrite:** Add the OpenRewrite plugin to your project.  You can configure it using Maven or Gradle.
2.  **Run the Migration:** Execute the `rewrite:run` goal in Maven or the `rewriteRun` task in Gradle.
3.  **Review Changes:** Inspect the changes made by OpenRewrite using `git diff`.

**Example:**

*   **Maven:**

```
mvn rewrite:run
```

*   **Gradle:**

```
gradlew rewriteRun
```

This recipe includes other recipes that handle specific parts of the migration, such as property migrations and API changes.


In [52]:
print(response)

{'query': 'Give me recipe to convert spring 2 to spring 3', 'result': "Here's a plan to migrate from Spring Boot 2.x to Spring Boot 3.x using OpenRewrite:\n\n**Recipe: Migrate to Spring Boot 3.x**\n\nThis composite recipe automates the migration from Spring Boot 2.x to Spring Boot 3.x. It updates build files, modifies deprecated APIs, and migrates configuration settings.\n\n**Steps:**\n\n1.  **Configure OpenRewrite:** Add the OpenRewrite plugin to your project.  You can configure it using Maven or Gradle.\n2.  **Run the Migration:** Execute the `rewrite:run` goal in Maven or the `rewriteRun` task in Gradle.\n3.  **Review Changes:** Inspect the changes made by OpenRewrite using `git diff`.\n\n**Example:**\n\n*   **Maven:**\n\n```\nmvn rewrite:run\n```\n\n*   **Gradle:**\n\n```\ngradlew rewriteRun\n```\n\nThis recipe includes other recipes that handle specific parts of the migration, such as property migrations and API changes.", 'source_documents': [Document(id='7021542b-bb18-42d9-80f0-

In [53]:
# print(response["source_documents"])
for doc in response["source_documents"]:
    # print(doc)
    print("From:", doc.metadata["url"])

From: https://docs.openrewrite.org/recipes/java/spring/boot3/springbootproperties_3_2
From: https://docs.openrewrite.org/running-recipes/popular-recipe-guides/migrate-to-spring-3
From: https://docs.openrewrite.org/recipes/java/spring/boot3/upgradespringboot_3_2
From: https://docs.openrewrite.org/recipes/java/spring/boot2/upgradespringboot_2_3
From: https://docs.openrewrite.org/recipes/java/spring/boot3


In [54]:
response = qa_chain.invoke("How to migrate from javax.xml.bind to jakarta.xml.bind?")
print(response['result'])

```yaml
.openrewrite.org/v1beta/recipe:
  name: org.openrewrite.java.migrate.jakarta.JavaxXmlBindMigrationToJakartaXmlBind
  displayName: Migrate deprecated `javax.xml.bind` packages to `jakarta.xml.bind`
  description: |
    Java EE has been rebranded to Jakarta EE, necessitating a package relocation.
  tags:
    - jaxb
    - javax
    - jakarta
  recipeList:
    - org.openrewrite.java.dependencies.ChangeDependency:
        oldGroupId: javax.xml.bind
        oldArtifactId: jaxb-api
        newGroupId: jakarta.xml.bind
        newArtifactId: jakarta.xml-api
```

This recipe will migrate your project from `javax.xml.bind` to `jakarta.xml.bind`. It updates the `javax.xml.bind:jaxb-api` dependency to `jakarta.xml.bind:jakarta.xml-api`.


In [55]:
response = qa_chain.invoke("What changed in Java 17?")
print(response['result'])

for doc in response["source_documents"]:
    print("From:", doc.metadata["url"])

Java SE 17 and JDK 17 include several updates and changes:

**New Language Feature:**

*   **Sealed Classes:** This feature, first previewed in Java SE 15, is now permanent. Sealed classes and interfaces restrict which other classes or interfaces may extend or implement them. (JEP 409)

**Library Changes:**

*   **Foreign Function and Memory API:** This API allows Java programs to interoperate with code and data outside the Java runtime. It was introduced in Java SE 17 and re-incubated with enhancements. (JEP 419)
*   **Vector API:** Introduced in Java SE 16 as an incubating API, it's re-incubated in this release with enhancements and performance improvements. (JEP 417)

**Removed APIs, Tools, and Components:**

*   For details on removals and deprecations, refer to the documentation on deprecated APIs and APIs removed in Java SE 18.
*   Finalization is deprecated for removal.

**Other Changes:**

*   Core reflection with method handles has been reimplemented (JEP 416).
*   Pattern Mat

In [56]:
response = qa_chain.invoke("How to create a custom OpenRewrite recipe?")
print(response['result'])

for doc in response["source_documents"]:
    print("From:", doc.metadata["url"])

To create a custom OpenRewrite recipe, you need to define it in a `rewrite.yml` file. Here's a general example:

```yaml
type: specs.openrewrite.org/v1beta/recipe
name: org.openrewrite.FindSpringUses
displayName: Find all Spring uses
description: This is an example of a custom recipe.
recipeList:
  - org.openrewrite.java.search.FindMethods:
      methodPattern: org.springframework..* *(..)
```

This example demonstrates a recipe that uses `FindMethods` to locate usages of Spring framework methods.

You can define your own recipes with different `recipeList` entries, customizing them to your specific needs. For instance, you could create recipes to:

*   Create an empty Java class:

    ```yaml
    type: specs.openrewrite.org/v1beta/recipe
    name: com.yourorg.CreateEmptyJavaClassExample
    displayName: Create Java class example
    recipeList:
      - org.openrewrite.java.CreateEmptyJavaClass:
          sourceRoot: src/main/java
          packageName: org.openrewrite.example
        

In [57]:
response = qa_chain.invoke("What are the major changes happened from java 8 to java 21")
print(response['result'])

for doc in response["source_documents"]:
    print("From:", doc.metadata["url"])

Here's a summary of major changes when migrating from Java 8 to Java 21, based on the provided documentation:

*   **Significant Changes:** The documentation highlights the importance of understanding the updates and changes between your current JDK release and the target JDK release (in this case, 21). If migrating from JDK 8, you should be familiar with the differences between JDK 8 and later releases.

*   **New Features and Enhancements:** Java SE 21 and JDK 21 include updates such as:

    *   Language Features: Record patterns and type patterns are enhanced for data navigation and processing.
*   **Migration Recipe:**

    *   There is an OpenRewrite recipe specifically for migrating to Java 21: `org.openrewrite.java.migrate.UpgradeToJava21`.

    *   This recipe addresses common changes needed when migrating to Java 21.

    *   It replaces deprecated APIs with equivalents where a clear migration strategy exists.

    *   It updates build files to use Java 21 as the target/sourc