In [2]:
from typing import List
import ollama
from pydantic import BaseModel
import os 
import json 
import requests
import tiktoken

In [3]:
class SummaryResult(BaseModel):
    summary: str
    code: str
    metadata: dict

In [4]:
enc = tiktoken.get_encoding("cl100k_base")

In [15]:
class ContextAwareFunctionSummaryGenerator: 
    '''
    This module aims to generate a summary for each function in a repository. Only python files are supported. 
    To make the summary generation of each function context aware, we do a depth first search on the file system tree and appending the context for each 
    step of the traversal. The run function runs the whole module and stores the results in self.vectors. 
    '''
    def __init__(self, ROOT, api):
        self.ROOT = ROOT
        self.api = api # api can be github api or local file system
        self.tokenLimit = 1000
        self.vectors: List[SummaryResult] = []
    
    def LLM_call(self, prompt: str):
        try:
            
            url = "http://localhost:11434/api/generate"

            # Correctly format the payload using json.dumps()
            payload = json.dumps({
                "model": "llama3.2",
                "prompt": prompt,
                "stream": False
            })

            headers = {
                "Content-Type": "application/json"
            }

            response = requests.post(url, headers=headers, data=payload)
            response = response.json()
            summary = response["response"]
            
            return summary
        except Exception as e:
            print(f"Error generating summary: {e}")
            return f"Error generating summary: {e}"
    
    def process_file(self, context: str, path: str):
        '''
        called when the traversal encounters a file.
        calls self.parse to parse the file into list of function, summarizes each function, and store them in self.vectors. 
        '''
        if not path.endswith('.py'):
            return
        
        file_vectors: List[SummaryResult] = []

        try:
            functions = self.parse(path)
            
            # Add file-specific information to context
            file_context = f"{context}\nFile: {path}\n"
            
            for func in functions:
                summary_result = self.summarize(file_context, func)
                file_vectors.append(summary_result)
            current_token_count = sum([len(enc.encode(vector.summary)) for vector in file_vectors])            
            while( self.tokenLimit < current_token_count):
                
                shortened_file_vectors: List[SummaryResult] = []
                for vector in file_vectors:
                    # shorten each vector
                    prompt = f"""""
                    We have a collection of summaries of functions in file whose token count is {current_token_count}. We cannot exceed this amount of tokens: {self.tokenLimit}
                    Please shorten the following individual summary by a reasonable amount while retaining meaning: 
                    {vector.summary}                    """
                    summary = self.LLM_call(prompt)
                    new_vector = SummaryResult(
                        summary=summary,
                        code=vector.code,
                        metadata=vector.metadata
                    )
                    shortened_file_vectors.append(new_vector)
                
                file_vectors = shortened_file_vectors
                current_token_count = sum([len(enc.encode(vector.summary)) for vector in file_vectors])  

            collection_of_summaries = ""
            for vector in file_vectors: 
                collection_of_summaries += vector.summary + "\n"
            prompt = f"""""
            We have the following collection of summaries of each function within this file. Based on these summaries, give a brief summary of the file itself based on these summaries
            Collections of Summaries: 
            {collection_of_summaries}
            """
            file_summary = self.LLM_call(prompt)
            
            return file_summary
        
            
                
        except Exception as e:
            print(f"Error processing file {path}: {e}")

    def process_dir(self, context: str, path: str): 
        '''
        called when the traversal encounters a directory
        processes the directory. 
        Calls process file for .py files in the directory.
        Uses current files's context and relavant documentation to enrich the context. 
        Calls process_dir on subdirectories with the enriched context. 
        '''
        import os
        
        try:
            # Enhance context with directory information
            dir_context = f"{context}\nDirectory: {path}\n"
            
            # Look for README or documentation to enhance context
            readme_path = os.path.join(path, "README.md")
            if os.path.exists(readme_path):
                try:
                    with open(readme_path, 'r') as f:
                        readme_content = f.read()
                    dir_context += f"\nREADME: {readme_content}\n"
                except Exception as e:
                    print(f"Error reading README {readme_path}: {e}")
            
            # Process all items in the directory
            for item in os.listdir(path):
                item_path = os.path.join(path, item)
                
                # Skip hidden files and directories
                if item.startswith('.'):
                    continue
                
                if os.path.isfile(item_path):
                    self.process_file(dir_context, item_path)
                elif os.path.isdir(item_path):
                    self.process_dir(dir_context, item_path)
                    
        except Exception as e:
            print(f"Error processing directory {path}: {e}")

    def parse(self, file_path) -> List[str]: 
        '''
        parse a .py file using the AST module into a list of strings, each string
        containing the code of a function 
        '''
        import ast
        
        functions = []
        try:
            with open(file_path, 'r') as f:
                file_content = f.read()
                print("parse function read file")
                
            tree = ast.parse(file_content)
            lines = file_content.splitlines()
            
            for node in ast.walk(tree):
                if isinstance(node, ast.FunctionDef):
                    # Get start line (1-based indexing in AST, convert to 0-indexed)
                    start_line = node.lineno - 1
                    
                    # Find function end based on indentation
                    base_indent = None
                    end_line = len(lines)
                    
                    # Get the indentation of the function definition line
                    first_line = lines[start_line]
                    base_indent = len(first_line) - len(first_line.lstrip())
                    
                    # Find the end of the function
                    for i, line in enumerate(lines[start_line + 1:], start_line + 1):
                        if not line.strip() or line.strip().startswith('#'):
                            continue
                            
                        curr_indent = len(line) - len(line.lstrip())
                        if curr_indent <= base_indent:
                            end_line = i
                            break
                    
                    function_code = '\n'.join(lines[start_line:end_line])
                    functions.append(function_code)
                    
        except Exception as e:
            print(f"Error parsing file {file_path}: {e}")
            
        return functions

    def summarize(self, context: str, target: str) -> SummaryResult:
        '''
        @params: 
        context: contextual information about the codebase
        target: the function string to be summarized
        Use LLM to summarize the target 
        '''
        prompt = f"""
        Given the following context about a codebase:
        Context is in the form of the current file's path followed by available summaries of ancestor directory files.
        {context}
        
        Please provide a concise summary for this function:
        ```python
        {target}
        ```
        
        Provide a clear description of:
        1. What the function does (purpose)
        2. Input parameters and their types
        3. Return value and type
        4. Any side effects
        5. How it relates to the rest of the codebase based on the context
        
        Format your response as a concise paragraph.
        """
        
        summary = self.LLM_call(prompt)

        return SummaryResult(
            summary=summary,
            code=target,
            metadata={"context":context}
        ) 

        


    def run(self) -> None: 
        '''
        runs the whole workflow
        '''
        print(f"Starting code summary generation for repository: {self.ROOT}")
        
        initial_context = f"Repository: {self.ROOT}\n"
        
        # Start processing from the root directory
        self.process_dir(initial_context, self.ROOT)
        
        print(f"Generated {len(self.vectors)} function summaries")


def main():
    # Repository path to analyze
    repo_path = os.path.abspath(".")
    
    # Initialize the summary generator
    # Using None for api since we're working with local filesystem
    generator = ContextAwareFunctionSummaryGenerator(ROOT=repo_path, api=None)
    
    # Run the summary generation process
    generator.run()
    
    print(f"Summary generation complete. Found {len(generator.vectors)} functions.")
    
    # Save the results to a JSON file
    results = [
        {
            "summary": item.summary,
            "code": item.code,
            "file_path": item.metadata.get("context", "").split("File: ")[-1].split("\n")[0] if "File: " in item.metadata.get("context", "") else "unknown",
            "repository": repo_path
        }
        for item in generator.vectors
    ]
    
    # Save to file
    output_file = "function_summaries.json"
    with open(output_file, "w") as f:
        json.dump(results, f, indent=2)
    
    print(f"Saved {len(results)} function summaries to {output_file}")
    
    # Display a few examples
    print("\nSample summaries:")
    for i, result in enumerate(results[:3]):
        print(f"\n--- Function {i+1} ---")
        print(f"File: {result['file_path']}")
        print(f"Summary: {result['summary'][:150]}...")

if __name__ == "__main__":
    main()

Starting code summary generation for repository: /Users/jerometh/Projects/CDS/smart-search/summary
parse function read file
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
parse function read file
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
Error generating summary: 'response'
parse function read file
Error generating summary: 'response'
Error generating summary: 'response'
parse function read file
Error generating summary

In [16]:
generator = ContextAwareFunctionSummaryGenerator(ROOT=os.getcwd(), api=None)

In [17]:
parsed_functions = generator.parse("test.py")
with open("test.py", 'r') as f:
    file_content = f.read()
    print(file_content)

print("Parsed Functions:\n")
for i, func in enumerate(parsed_functions, 1):
    context = "File Path: './'" \
    "Summary: "

    print((generator.summarize(context, func)).summary)

parse function read file
import requests
import json
import time
from collections import deque

class WeatherDataProcessor:
    def __init__(self, api_key, cache_size=5):
 
        self.api_key = api_key
        self.cache = deque(maxlen=cache_size)

    def fetch_weather_data(self, city):

        url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={self.api_key}"
        response = requests.get(url)
        
        if response.status_code == 200:
            data = response.json()
            self.cache.append((city, data, time.time()))
            return data
        else:
            raise ValueError(f"Failed to fetch weather data for {city}")

    def get_cached_weather(self, city):
 
        for cached_city, data, timestamp in self.cache:
            if cached_city == city and time.time() - timestamp < 600:  # 10-minute cache
                return data
        return None

    def analyze_weather(self, weather_data):

        temp = weather_data["main"]["temp

In [102]:
print(generator.process_file("", "./test.py"))

Based on these summaries, this file is likely part of a Python-based project that interacts with an OpenWeatherMap API to manage and display weather data for various cities. The code appears to utilize caching mechanisms to improve efficiency and reduce the number of requests made to the API.

The file contains functions that:

1. Initialize an instance of a class with an API key and optional cache size parameter.
2. Fetch current weather data from the OpenWeatherMap API for a given city, caching responses when possible.
3. Retrieve cached weather data for a given city, with a 10-minute expiration time.
4. Analyze weather data by converting temperature to Celsius, extracting weather descriptions, and calculating wind speed.
5. Combine cached weather data with an analysis of the latest weather data for a specified city.

The overall purpose of this file is to provide a flexible and efficient way to manage and display weather data for various cities, leveraging caching and external API c