# Imports and Creating working directory

Creates Directories and looks for windows on all desktops

In [39]:
# Step 1: Imports
import os
import pygetwindow as gw
import pyautogui
import pytesseract
from PIL import Image
import time
from datetime import datetime

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


# Optional: Set tesseract path if needed
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Step 2: Create output folder if not exists
output_dir = "parsed_texts"
output_dir2 = "explained_solns"
output_dir3 = "screen_shots"
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_dir2, exist_ok=True)
os.makedirs(output_dir3, exist_ok=True)

# Step 3: Detect desktop windows

windows = gw.getWindowsWithTitle("")  # all windows

# Filter out minimized or tiny windows
desktop_windows = [win for win in windows if not win.isMinimized and win.width > 100 and win.height > 100]

print(f"Detected {len(desktop_windows)} windows (potential desktops):")
for i, win in enumerate(desktop_windows):
    print(f"{i+1}. {win.title} (Width: {win.width}, Height: {win.height})")

Detected 15 windows (potential desktops):
1. ● main.ipynb - Leetcode_Solver - Visual Studio Code (Width: 1936, Height: 1056)
2. ChatGPT - Leet code solver - Google Chrome (Width: 1936, Height: 1056)
3. wf1.ipynb - Ontology_V2 - Visual Studio Code (Width: 1200, Height: 800)
4. (31) Jon Stewart on Trump's Botched Tariff Rollout & The Stock Market's Meltdown | The Daily Show - YouTube - Google Chrome (Width: 1936, Height: 1056)
5. League of Legends (Width: 1024, Height: 576)
6. m17fu - Search and 1 more page - Work - Microsoft​ Edge (Width: 1936, Height: 1056)
7. Calculator (Width: 336, Height: 540)
8. Settings (Width: 1920, Height: 1040)
9.  (Width: 1920, Height: 1040)
10.  (Width: 1920, Height: 1040)
11. NVIDIA GeForce Overlay (Width: 1920, Height: 1080)
12. Microsoft Text Input Application (Width: 1920, Height: 1080)
13.  (Width: 3840, Height: 1080)
14.  (Width: 3840, Height: 1080)
15. Program Manager (Width: 3840, Height: 1080)


# Choose Window and Screenshot, With Ocr parse and save

In [41]:
from PIL import ImageEnhance

index_to_use = 1  # Change this to select a different window

# Define output directories
screenshot_dir = "screen_shots"
parsed_text_dir = "parsed_texts"

# Ensure folders exist
os.makedirs(screenshot_dir, exist_ok=True)
os.makedirs(parsed_text_dir, exist_ok=True)

if len(desktop_windows) > index_to_use:
    target_window = desktop_windows[index_to_use]
    target_window.activate()
    time.sleep(1)  # Allow the window to focus

    bbox = (target_window.left, target_window.top, target_window.width, target_window.height)

    # Take screenshot
    screenshot = pyautogui.screenshot(region=bbox)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    image_path = os.path.join(screenshot_dir, f"screenshot_{timestamp}.png")
    screenshot.save(image_path)

    # === OCR Section ===

    # Load and upscale the image (2x)
    zoom_int = 4
    image = Image.open(image_path)
    upscaled_image = image.resize((image.width * zoom_int, image.height * zoom_int), Image.LANCZOS)

    # Optional: Enhance contrast (can be adjusted or commented out)
    # enhancer = ImageEnhance.Contrast(upscaled_image)
    # upscaled_image = enhancer.enhance(2.0)

    # Run OCR with custom config
    custom_config = r'--oem 3 --psm 6'
    parsed_text = pytesseract.image_to_string(upscaled_image, config=custom_config)

    # Save parsed text
    text_path = os.path.join(parsed_text_dir, f"parsed_text_{timestamp}.txt")
    with open(text_path, 'w', encoding='utf-8') as f:
        f.write(parsed_text)

    print(f"✅ Screenshot saved to: {image_path}")
    print(f"📝 Parsed text saved to: {text_path}")
else:
    print(f"❌ Not enough windows. Index {index_to_use} is out of range.")


✅ Screenshot saved to: screen_shots\screenshot_20250410_021703.png
📝 Parsed text saved to: parsed_texts\parsed_text_20250410_021703.txt


# GPT API Implementation

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import os
from datetime import datetime

# Load API key from .env
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Directory for parsed text and solution output
parsed_text_dir = "parsed_texts"
explained_solns_dir = "explained_solns"
os.makedirs(explained_solns_dir, exist_ok=True)

# Get latest parsed text file
parsed_text_files = sorted(os.listdir(parsed_text_dir), reverse=True)
latest_text_file = next((f for f in parsed_text_files if f.endswith('.txt')), None)

if latest_text_file:
    with open(os.path.join(parsed_text_dir, latest_text_file), 'r', encoding='utf-8') as f:
        parsed_text = f.read()

    system_prompt = (
        "You are an expert competitive programmer. In the provided text, "
        "there is a LeetCode-style coding question. Your job is to extract the problem, "
        "explain the logic step-by-step, and provide a full working solution, Giving the On time and space analysis. Your answer should be in python."
    )

    user_prompt = (
        f"Here is the parsed content from a screenshot:\n\n"
        f"{parsed_text}\n\n"
        "Please extract the coding question (if any), explain your thought process clearly, "
        "and provide a complete answer with code."
    )

    # Submit to OpenAI using latest SDK format
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.7,
        max_tokens=10000
    )

    # Extract and print response
    gpt_response = completion.choices[0].message.content
    print("🧠 GPT Response:\n")
    print(gpt_response)

    # Save as Markdown file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    md_path = os.path.join(explained_solns_dir, f"solution_{timestamp}.md")
    with open(md_path, 'w', encoding='utf-8') as f:
        f.write(gpt_response)

    print(f"✅ Solution saved to: {md_path}")

else:
    print("⚠️ No parsed text file found.")


🧠 GPT Response:

### Problem: Group Anagrams (LeetCode #49)

#### Description:
Given an array of strings `strs`, group the anagrams together. You can return the answer in any order.

An anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once.

#### Example:
**Input:** `strs = ["eat", "tea", "tan", "ate", "nat", "bat"]`  
**Output:** `[["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]`

#### Constraints:
- `1 <= strs.length <= 10^4`
- `1 <= strs[i].length <= 100`
- `strs[i]` consists of lowercase English letters.

### Thought Process:
1. **Understanding Anagrams**: Two strings are anagrams if they contain the same characters with the same frequency. For instance, "eat" and "tea" are anagrams because they both consist of the letters 'e', 'a', and 't'.

2. **Grouping Logic**: To group anagrams, we can use a hash table (or dictionary) where the key is a representation of the anagram group (such as a so