# Hypothesis Generator Agent

This notebook implements the hypothesis generation component using DeepSeek-R1 for reasoning and hypothesis formulation based on literature analysis.

In [None]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

import json
import requests
from typing import List, Dict
from utils.config import setup_logging, DEEPSEEK_API_KEY, OUTPUTS_DIR
from utils.helpers import load_json, save_json
from tqdm import tqdm

In [None]:
# Setup logging
logger = setup_logging('hypothesis_generator')

class DeepSeekAPI:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.deepseek.com/v1"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
    
    def generate_hypothesis(self, context: str) -> Dict:
        """Generate hypothesis using DeepSeek-R1."""
        prompt = f"""
        Based on the following research context, generate a novel and testable scientific hypothesis:
        
        Context:
        {context}
        
        Please provide:
        1. A clear hypothesis statement
        2. The rationale behind it
        3. Potential experimental approaches to test it
        """
        
        try:
            response = requests.post(
                f"{self.base_url}/completions",
                headers=self.headers,
                json={
                    "model": "deepseek-r1",
                    "prompt": prompt,
                    "max_tokens": 1000,
                    "temperature": 0.7
                }
            )
            response.raise_for_status()
            return response.json()
            
        except Exception as e:
            logger.error(f"Error in DeepSeek API call: {str(e)}")
            return None

In [None]:
def generate_hypotheses() -> Dict:
    """Generate hypotheses based on literature analysis."""
    logger.info('Starting hypothesis generation')
    
    # Load literature analysis
    try:
        analysis_path = OUTPUTS_DIR / 'literature_analysis.json'
        literature_analysis = load_json(analysis_path)
    except Exception as e:
        logger.error(f'Error loading literature analysis: {str(e)}')
        return {}
    
    # Initialize DeepSeek API
    deepseek = DeepSeekAPI(DEEPSEEK_API_KEY)
    
    # Generate hypotheses
    hypotheses = {}
    for paper_title, analysis in tqdm(literature_analysis.items(), desc='Generating hypotheses'):
        # Prepare context from key insights
        context = "\n".join(analysis['key_insights'])
        
        # Generate hypothesis
        result = deepseek.generate_hypothesis(context)
        if result:
            hypotheses[paper_title] = {
                'context': context,
                'generated_hypothesis': result
            }
            logger.info(f'Generated hypothesis for paper: {paper_title}')
    
    # Save results
    output_path = OUTPUTS_DIR / 'generated_hypotheses.json'
    save_json(hypotheses, output_path)
    logger.info(f'Saved generated hypotheses to {output_path}')
    
    return hypotheses

In [None]:
if __name__ == "__main__":
    # Generate hypotheses
    results = generate_hypotheses()
    
    # Print summary
    print(f"Generated hypotheses for {len(results)} papers")
    for title, data in results.items():
        print(f"\nPaper: {title}")
        print("Generated Hypothesis:")
        print(data['generated_hypothesis'])