In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("VSCodeTest") \
    .master("local[*]") \
    .getOrCreate()

print("‚úÖ Spark session running in VS Code!")
print("Spark version:", spark.version)

‚úÖ Spark session running in VS Code!
Spark version: 4.0.0


In [None]:
from openai import OpenAI
from IPython.display import Markdown, display
import unicodedata
import re

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama"  # Dummy key required by SDK
)

def ask_local_gpt(prompt: str, model="gpt-oss:20b", system_message=None, render_markdown=True):
    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})
    messages.append({"role": "user", "content": prompt})

    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )

    raw_output = response.choices[0].message.content

    # Normalize spacing but preserve formatting
    clean_output = unicodedata.normalize("NFKC", raw_output).replace("\u202f", " ")

    if render_markdown:
        display(Markdown(clean_output))
    return clean_output


In [43]:
import time
import json
from typing import Optional
from openai import OpenAI
from IPython.display import Markdown, display


# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama"  # Dummy key required by SDK
)

In [44]:

# --- Setup ---
conversation_log = []
chat_memory = []

def ask_local_gpt(
    prompt: str,
    model: str = "gpt-oss:20b",
    system_message: Optional[str] = None,
    render_markdown: bool = True,
    verbose: bool = False,
    return_raw: bool = False,
    reset_chat: bool = False,
    show_history: bool = False,
    stream_response: bool = True,
    reasoning_mode: bool = False,
    json_mode: bool = False  # <-- new flag!
) -> Optional[str]:
    """
    Query a local GPT model via Ollama, with support for step-by-step reasoning (Scratchpad).
    If json_mode is True, the model is instructed to respond in JSON, and results are parsed accordingly.

    Args:
        prompt (str): The user‚Äôs input/question.
        ...
        json_mode (bool): If True, instructs the LLM to return a JSON object with 'thoughts' and 'answer'.
    Returns:
        str or None: Clean final answer or full output, depending on flags.
    """
    global chat_memory, conversation_log

    try:
        if reset_chat:
            chat_memory = []

        # -- Prompt setup --
        if json_mode:
            sys_prompt = (
                "You are a helpful assistant. "
                "For every question, reply ONLY in this exact JSON format: "
                "{\"thoughts\": \"<step-by-step reasoning>\", \"answer\": \"<final answer only>\"} "
                "Do not include any extra commentary, code fences, or markdown‚Äîjust the JSON."
            )
            chat_memory = [{"role": "system", "content": sys_prompt}]
            full_prompt = prompt
        else:
            # Normal or reasoning mode (old logic)
            default_reasoning_prompt = (
                "You are a helpful assistant that always reasons step by step before giving an answer. "
                "First, think through the problem, then provide a clear final answer."
            )
            if not any(m.get("role") == "system" for m in chat_memory):
                chat_memory.insert(0, {
                    "role": "system",
                    "content": system_message or (default_reasoning_prompt if reasoning_mode else "")
                })
            if reasoning_mode:
                full_prompt = (
                    f"### Scratchpad:\n"
                    f"The user asked: \"{prompt.strip()}\"\n"
                    f"Think step-by-step and reason before answering.\n\n"
                    f"### Final Answer:\n"
                )
            else:
                full_prompt = prompt

        chat_memory.append({"role": "user", "content": full_prompt})
        start = time.time()

        # --- Get response (streamed or not) ---
        if stream_response:
            print("ü§î Thinking...\n")
            stream = client.chat.completions.create(
                model=model,
                messages=chat_memory,
                stream=True
            )
            tokens = []
            for chunk in stream:
                delta = chunk.choices[0].delta.content or ""
                print(delta, end="", flush=True)
                tokens.append(delta)
            print()
            assistant_reply = "".join(tokens)
        else:
            response = client.chat.completions.create(
                model=model,
                messages=chat_memory
            )
            assistant_reply = response.choices[0].message.content

        chat_memory.append({"role": "assistant", "content": assistant_reply})

        # --- Parse output ---
        thoughts, final_answer = "", assistant_reply.strip()
        if json_mode:
            # Try to parse first valid JSON block in reply
            try:
                start_idx = assistant_reply.find('{')
                end_idx = assistant_reply.rfind('}') + 1
                if start_idx == -1 or end_idx == -1:
                    raise ValueError("No JSON object detected in model reply.")
                data = json.loads(assistant_reply[start_idx:end_idx])
                thoughts = data.get("thoughts", "").strip()
                final_answer = data.get("answer", "").strip()
            except Exception as e:
                print("‚ùå Failed to parse model JSON output:", e)
                thoughts = ""
                final_answer = assistant_reply.strip()
        else:
            # fallback: regex/extraction (from your original, see above)
            import re
            scratchpad_match = re.search(
                r"### Scratchpad:\s*(.*?)### Final Answer:",
                assistant_reply,
                re.DOTALL | re.IGNORECASE
            )
            final_answer_match = re.search(
                r"### Final Answer:\s*([\s\S]*?)(?:$|\n#|\n\n)",
                assistant_reply,
                re.DOTALL | re.IGNORECASE
            )
            if scratchpad_match:
                thoughts = scratchpad_match.group(1).strip()
            if final_answer_match:
                final_answer = final_answer_match.group(1).strip()

        # --- Log results ---
        conversation_log.append({
            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
            "prompt": prompt,
            "thoughts": thoughts,
            "answer": final_answer,
            "raw": assistant_reply
        })

        # --- Output control ---
        if return_raw:
            return assistant_reply
        elif render_markdown:
            display(Markdown(final_answer))
        elif verbose:
            print("\nüßº Clean answer:\n", final_answer)
        else:
            return final_answer

        if verbose:
            print(f"\n‚úÖ Response time: {round(time.time() - start, 2)}s")

        if show_history:
            print("\nüìú Message History:")
            for msg in chat_memory:
                print(f"{msg['role'].upper()}: {msg['content']}\n")

        return final_answer if verbose else None

    except Exception as e:
        print("‚ùå Error in ask_local_gpt:", str(e))
        return "Error occurred."


In [49]:
ask_local_gpt("Can you provide a comparison between Samsung phones and iPhones", stream_response=True, reasoning_mode=True,  json_mode=True);

ü§î Thinking...

{"thoughts":"I interpreted the user‚Äôs request as a desire for a concise side‚Äëby‚Äëside comparison of Samsung phones and iPhones. I considered key comparison categories such as design and build, operating system, hardware specifications, camera performance, battery life and charging, price ranges, ecosystem and software integration, and updates. I decided to format the answer in plain text with line breaks for readability, while keeping it concise and avoiding excessive detail. I made sure the JSON structure follows the required format exactly, with two keys: 'thoughts' for the reasoning and 'answer' for the final comparison.\n","answer":"Design & Build:\n- Samsung: Android, diverse form factors (regular, foldable, rugged), high‚Äëresolution displays, glass or metal chassis.\n- iPhone: iOS, consistent flat‚Äëfront design, aluminum/ glass body, premium materials.\n\nOperating System:\n- Samsung: Android (custom One UI skin), more flexibility, expandable storage, Goo

Design & Build:
- Samsung: Android, diverse form factors (regular, foldable, rugged), high‚Äëresolution displays, glass or metal chassis.
- iPhone: iOS, consistent flat‚Äëfront design, aluminum/ glass body, premium materials.

Operating System:
- Samsung: Android (custom One UI skin), more flexibility, expandable storage, Google services.
- iPhone: iOS, tighter integration, closed ecosystem, no expandable storage.

Hardware & Performance:
- Samsung: Latest Qualcomm/Snapdragon or Exynos (depending on region), usually more RAM, cutting‚Äëedge processors.
- iPhone: Apple A‚Äëseries chips, strong single‚Äëcore performance, highly optimized for software.

Camera:
- Samsung: Typically multi‚Äëcamera setups, higher specs (50‚Äë100MP), advanced zoom, diverse modes.
- iPhone: Consistent image quality, excellent computational photography, fewer but refined lenses.

Battery & Charging:
- Samsung: Larger capacities, fast charging (25‚Äë100W), reversible or wired charging.
- iPhone: Slightly smaller capacities, fast charging (20W max), MagSafe accessories.

Price Range:
- Samsung: Wide spectrum from budget (~$200) to flagship (~$1500).
- iPhone: Premium pricing, flagship (~$1400) to mid‚Äërange (~$400), no budget models.

Ecosystem:
- Samsung: Samsung Wearables, DeX, SmartThings for home.
- iPhone: Apple Watch, AirPods, HomePod, tight integration with macOS, iCloud.

Software Updates:
- Samsung: Gradual rollout, official support 3‚Äì4 years for major releases.
- iPhone: Regular yearly updates for 5+ years.

Overall Choice:
- Samsung appeals to users wanting hardware variety, customizable software, high specs at various price points.
- iPhone appeals to users who value ecosystem cohesion, consistent software experience, and long‚Äëterm OS support.

In [50]:
import pandas as pd
df_log = pd.DataFrame(conversation_log)
df_log.to_csv("chat_log.csv", index=False)

In [51]:
df_log

Unnamed: 0,timestamp,prompt,thoughts,answer,raw
0,2025-08-06 22:05:12,What's 1 + 1?,The user asks for a simple arithmetic sum. 1 +...,2,"{""thoughts"":""The user asks for a simple arithm..."
1,2025-08-06 22:06:17,what is a women's favorite thing to eat using ...,,,"{""Error"":""I‚Äôm sorry, but I can‚Äôt help with tha..."
2,2025-08-06 22:08:40,Can you provide a comparison between Samsung p...,I interpreted the user‚Äôs request as a desire f...,"Design & Build:\n- Samsung: Android, diverse f...","{""thoughts"":""I interpreted the user‚Äôs request ..."


In [2]:
spark.stop()