In [3]:
pip install openai

Collecting openai
  Downloading openai-1.34.0-py3-none-any.whl.metadata (21 kB)
Collecting anyio<5,>=3.5.0 (from openai)
  Downloading anyio-4.4.0-py3-none-any.whl.metadata (4.6 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Downloading pydantic-2.7.4-py3-none-any.whl.metadata (109 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.4/109.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting sniffio (from openai)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting tqdm>4 (from openai)
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m987.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting typing-extens

In [None]:
import os
import csv
import threading
from openai import OpenAI
import config


def read_file_content(file_path):
    """Reads the content of a given file."""
    try:
        with open(file_path, "r") as file:
            return file.read()
    except IOError as e:
        print(f"Error reading file {file_path}: {e}")
        return None

def chunk_text(text, max_tokens):
    """Divides a text into chunks each of which is at most max_tokens long."""
    chunks = []
    while len(text) > max_tokens:
        chunk, text = text[:max_tokens], text[max_tokens:]
        chunks.append(chunk)
    if text:
        chunks.append(text)
    return chunks

def get_user_prompt():
    """Prompts the user for the input to be processed by OpenAI."""
    print("Please enter the prompt for OpenAI completion:")
    return input()

def process_text(client, chunk, prompt):
    """Processes a chunk of text through the OpenAI API."""
    try:
        full_prompt = prompt.replace("content", chunk)
        response = client.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {"role": "user", "content": full_prompt},
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Failed to process text: {e}")
        return None

def process_file(client, file_path, root_folder, prompt, writer, lock):
    """Processes a single file and writes the result to the CSV file."""
    content = read_file_content(file_path)
    if content:
        relative_path = os.path.relpath(os.path.dirname(file_path), root_folder)
        formatted_filename = os.path.join(relative_path, os.path.basename(file_path))

        content_chunks = chunk_text(content, 16300)
        responses = []
        for chunk in content_chunks:
            response = process_text(client, chunk, prompt)
            if response:
                responses.append(response)

        combined_response = "\n".join(responses)
        
        with lock:
            writer.writerow({
                'filename': formatted_filename,
                'file_content': content,
                'response': combined_response
            })

def process_files(root_folder, extensions, output_file, prompt):
    """Processes files that match given extensions in a directory tree using multithreading."""
    client = OpenAI(api_key=config.api_key)  # Replace with your actual OpenAI API key
    lock = threading.Lock()  # Lock for writing to CSV file

    with open(output_file, mode='w', newline='') as csvfile:
        fieldnames = ['filename', 'file_content', 'response']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        threads = []

        for folder_path, _, filenames in os.walk(root_folder):
            for filename in filenames:
                if any(filename.endswith(ext) for ext in extensions):
                    file_path = os.path.join(folder_path, filename)
                    thread = threading.Thread(target=process_file, args=(client, file_path, root_folder, prompt, writer, lock))
                    threads.append(thread)
                    thread.start()

        for thread in threads:
            thread.join()

# Main execution block
if __name__ == "__main__":
    root_folder = "/Users/parthagarwal/Desktop/Allen_12/Biology_all/"
    output_file = "bio1.csv"
    extensions = [".txt", ".mmd"]
    user_prompt = "Design a series of structured concept cards tailored for NEET preparation, focusing on a specific Biology chapter. Each card should contain 30-35 words and include critical and key Biology concepts essential for the NEET exam. Use content from the provided 'mmd_content' for each card, ensuring it covers all necessary concepts and data from the 'mmd_file'. The layout of the cards should facilitate quick revision, enhance memorization, and aid in understanding, making them highly effective for NEET candidates. Ensure the content is concise, complete, and optimized for quick learning."
    process_files(root_folder, extensions, output_file, user_prompt)
