In [None]:
# Exercise 5: Advanced Geoscience Conversations with Galactica Model
model_name = "GeorgiaTechResearchInstitute/galactica-6.7b-evol-instruct-70k"

print(f"Loading Galactica model: {model_name}")

# Create tokenizer for Galactica
galactica_tokenizer = AutoTokenizer.from_pretrained(model_name)

# Updated quantization config for the larger model
galactica_quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

# Load the Galactica model
galactica_model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto", 
    quantization_config=galactica_quant_config,
    trust_remote_code=True  # Required for some models
)

# Set pad token for Galactica
if galactica_tokenizer.pad_token is None:
    galactica_tokenizer.pad_token = galactica_tokenizer.eos_token

# Create pipeline for Galactica
galactica_pipe = pipeline(
    "text-generation",
    model=galactica_model,
    tokenizer=galactica_tokenizer,
    max_new_tokens=200,  # Increased for more detailed scientific responses
    temperature=0.3,     # Slightly higher for more creative scientific explanations
    do_sample=True,
    pad_token_id=galactica_tokenizer.eos_token_id,
    return_full_text=False
)

# Create LangChain components for Galactica
galactica_llm = HuggingFacePipeline(pipeline=galactica_pipe)
galactica_chat_model = ChatHuggingFace(llm=galactica_llm)

print(f"Galactica model loaded successfully!")
print(f"Model parameters: {galactica_model.num_parameters():,}")

## 9. Advanced exercise: Scientific model (Galactica 6.7B)

Intent:
- Try a science-oriented model for more rigorous, equation-heavy answers.

Caveats:
- 6.7B is large for CPU; best on GPU with 4-bit quantization.
- Scientific models may hallucinate citations/equations — verify results.
- License/availability may vary; HF token may be required.

Setup guidance:
- Use BitsAndBytes 4-bit on Colab GPU; reduce max_new_tokens to control latency.
- Start with a conservative temperature (e.g., 0.2–0.5).

Evaluation ideas:
- Ask for derivations (Zoeppritz, Gassmann).
- Request formula plus variable definitions and typical value ranges.
- Compare answers vs known references.

***

In [None]:
# Exercise 5: Advanced Geoscience Conversations with Galactica Model
model_name = "GeorgiaTechResearchInstitute/galactica-6.7b-evol-instruct-70k"

print(f"Loading Galactica model: {model_name}")

# Create tokenizer for Galactica

""" YOUR CODE HERE """

# Updated quantization config for the larger model

""" YOUR CODE HERE """

# Load the Galactica model

""" YOUR CODE HERE """

# Set pad token for Galactica
""" YOUR CODE HERE """

# Create pipeline for Galactica
""" YOUR CODE HERE """

# Create LangChain components for Galactica
galactica_llm = """ YOUR CODE HERE """
galactica_chat_model = ChatHuggingFace(llm=galactica_llm)

print(f"Galactica model loaded successfully!")
print(f"Model parameters: {galactica_model.num_parameters():,}")

In [None]:
# Enhanced Geoscience Chat Agent for Advanced Scientific Queries
class AdvancedGeoscienceChatAgent(ModernGeoscienceChatAgent):
    def __init__(self, chat_model):
        super().__init__(chat_model)
        
        # Enhanced system prompt for scientific rigor with Galactica
        self.system_prompt = """
        YOUR PROMPT HERE
"""

# Create the advanced agent with Galactica
""" YOUR CODE HERE """
print("Advanced GeoscienceChatAgent with Galactica model created!")

In [None]:
# Advanced Scientific Query Categories
advanced_scenarios = {
    "Quantitative Reservoir Analysis": [
        "Calculate the hydrocarbon pore volume for a reservoir with 25% porosity, 65% water saturation, and net pay of 150 ft over 1000 acres.",
        "What is the relationship between permeability and grain size in sandstone reservoirs? Provide the Kozeny-Carman equation.",
        "How do you estimate original oil in place using volumetric methods? Include uncertainty analysis."
    ],
    
    "Advanced Seismic Interpretation": [
        "Explain the physics behind AVO (Amplitude Versus Offset) analysis and the Zoeppritz equations.",
        "How do you distinguish between structural and stratigraphic hydrocarbon traps using seismic attributes?",
        "What are the key parameters in seismic resolution analysis? Provide the mathematical relationships."
    ],
    
    "Geomechanics & Drilling": [
        "Calculate the minimum horizontal stress using the poroelastic theory. Include Biot's coefficient.",
        "How do you predict wellbore stability using the Mohr-Coulomb failure criterion?",
        "What factors control hydraulic fracture propagation in unconventional reservoirs?"
    ],
    
    "Carbon Sequestration": [
        "What are the key thermodynamic properties of CO2 at typical reservoir conditions (3000 ft depth, 120°F)?",
        "How do you assess caprock integrity for CO2 storage? Include geochemical considerations.",
        "Calculate the CO2 storage capacity using the methodology from the CO2 Storage Atlas."
    ],
    
    "Machine Learning in Geosciences": [
        "How can neural networks be applied to seismic facies classification? What are the key preprocessing steps?",
        "Explain the use of clustering algorithms for well log analysis and formation evaluation.",
        "What machine learning approaches are most effective for reservoir property prediction from seismic data?"
    ]
}

In [None]:
def create_advanced_gradio_interface():
    """Create an advanced Gradio interface for scientific geoscience queries"""
    
    # Global session
    global advanced_session
    advanced_session = advanced_agent.create_new_session()
    
    def advanced_respond(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
        global advanced_session
        
        if not message.strip():
            return "", history
        
        # Get response from advanced agent
        bot_response = advanced_agent.chat(message, advanced_session)
        history.append((message, bot_response))
        return "", history
    
    def clear_advanced_conversation() -> List[Tuple[str, str]]:
        global advanced_session
        advanced_agent.clear_memory(advanced_session)
        advanced_session = advanced_agent.create_new_session()
        return []
    
    def load_scenario_questions(scenario_name: str) -> str:
        """Load all questions from a scenario"""
        if scenario_name in advanced_scenarios:
            questions = advanced_scenarios[scenario_name]
            return "\n\n".join([f"Q{i+1}: {q}" for i, q in enumerate(questions)])
        return ""
    
    # Create the interface
    with gr.Blocks(
        title="Dr. GeoBot-Advanced - Scientific Geoscience AI",
        theme=gr.themes.Base(),
        css="""
        .gradio-container { background: linear-gradient(45deg, #1e3a8a, #1e40af) }
        .chat-message { background: rgba(255,255,255,0.95); border-radius: 10px; }
        """
    ) as advanced_demo:
        gr.Markdown("""
        # 🧬 Dr. GeoBot-Advanced - Scientific Geoscience AI
        ### *Powered by Galactica-6.7B for Advanced Scientific Reasoning*
        
        | **🔬 Advanced Capabilities** | **📊 Quantitative Analysis** | **🎯 Research Applications** |
        |---|---|---|
        | Complex equation derivation | Statistical reservoir analysis | ML/AI in geosciences |
        | Multi-physics modeling | Uncertainty quantification | Carbon sequestration |
        | Advanced interpretation | Geomechanical calculations | Unconventional resources |
        
        💡 *Ask detailed scientific questions with quantitative requirements*
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    value=[],
                    height=600,
                    show_label=False,
                    bubble_full_width=False,
                    avatar_images=("🧑‍🔬", "🤖")
                )
                
                with gr.Row():
                    msg = gr.Textbox(
                        placeholder="Ask advanced geoscience questions (include specific parameters, equations, calculations)...",
                        show_label=False,
                        scale=5,
                        container=False
                    )
                    send_btn = gr.Button("🚀 Analyze", scale=1, variant="primary")
                
                with gr.Row():
                    clear_btn = gr.Button("🗑️ Clear Session", variant="secondary")
                    export_btn = gr.Button("📋 Export Chat", variant="secondary")
            
            with gr.Column(scale=1):
                gr.Markdown("### 🔬 Advanced Query Categories")
                
                scenario_dropdown = gr.Dropdown(
                    choices=list(advanced_scenarios.keys()),
                    label="Select Scientific Domain",
                    value=None
                )
                
                scenario_text = gr.Textbox(
                    label="Scenario Questions",
                    lines=8,
                    placeholder="Select a domain to see advanced questions..."
                )
                
                load_scenario_btn = gr.Button("📖 Load Questions", variant="secondary")
                
                gr.Markdown("""
                ### 💡 Quick Examples
                - *"Calculate fracture pressure using Eaton's method"*
                - *"Derive the Gardner equation for density prediction"*
                - *"Explain Gassmann fluid substitution theory"*
                - *"Model CO2 solubility in brine at reservoir conditions"*
                """)
        
        # Event handlers
        msg.submit(advanced_respond, [msg, chatbot], [msg, chatbot])
        send_btn.click(advanced_respond, [msg, chatbot], [msg, chatbot])
        clear_btn.click(clear_advanced_conversation, outputs=chatbot)
        
        scenario_dropdown.change(
            load_scenario_questions,
            inputs=scenario_dropdown,
            outputs=scenario_text
        )
        
        load_scenario_btn.click(
            lambda text: text,
            inputs=scenario_text,
            outputs=msg
        )
    
    return advanced_demo

# Create and launch the advanced interface
print("Creating Gradio Interface with Galactica...")
advanced_demo = create_advanced_gradio_interface()
advanced_demo.launch(share=True, show_error=True)

In [None]:
# Enhanced Geoscience Chat Agent for Advanced Scientific Queries
class AdvancedGeoscienceChatAgent(ModernGeoscienceChatAgent):
    def __init__(self, chat_model):
        super().__init__(chat_model)
        
        # Enhanced system prompt for scientific rigor with Galactica
        self.system_prompt = """
You are Dr. GeoBot-Advanced, a world-class geoscientist with expertise in:

🔬 **Core Specializations:**
- Advanced geophysics and seismic interpretation
- Reservoir geomechanics and flow simulation
- Computational geology and machine learning applications
- Carbon sequestration and geothermal systems
- Unconventional resource characterization
- Quantitative seismic analysis and AVO modeling

📊 **Research Focus:**
- Provide scientifically accurate, evidence-based responses
- Include relevant equations, formulas, and quantitative relationships
- Reference established geological principles and physical laws
- Explain complex concepts with scientific precision
- Suggest experimental or analytical approaches when appropriate

🎯 **Response Guidelines:**
- Use proper scientific terminology and units
- Provide quantitative insights when relevant
- Reference established research and methodologies
- Acknowledge uncertainties and limitations
- Suggest follow-up investigations or analyses
- Keep responses detailed but focused (3-5 sentences)

Format mathematical expressions clearly and provide practical context for all scientific concepts.
"""

# Create the advanced agent with Galactica
advanced_agent = AdvancedGeoscienceChatAgent(galactica_chat_model)
print("Advanced GeoscienceChatAgent with Galactica model created!")

In [None]:
# Advanced Scientific Query Categories
advanced_scenarios = {
    "Quantitative Reservoir Analysis": [
        "Calculate the hydrocarbon pore volume for a reservoir with 25% porosity, 65% water saturation, and net pay of 150 ft over 1000 acres.",
        "What is the relationship between permeability and grain size in sandstone reservoirs? Provide the Kozeny-Carman equation.",
        "How do you estimate original oil in place using volumetric methods? Include uncertainty analysis."
    ],
    
    "Advanced Seismic Interpretation": [
        "Explain the physics behind AVO (Amplitude Versus Offset) analysis and the Zoeppritz equations.",
        "How do you distinguish between structural and stratigraphic hydrocarbon traps using seismic attributes?",
        "What are the key parameters in seismic resolution analysis? Provide the mathematical relationships."
    ],
    
    "Geomechanics & Drilling": [
        "Calculate the minimum horizontal stress using the poroelastic theory. Include Biot's coefficient.",
        "How do you predict wellbore stability using the Mohr-Coulomb failure criterion?",
        "What factors control hydraulic fracture propagation in unconventional reservoirs?"
    ],
    
    "Carbon Sequestration": [
        "What are the key thermodynamic properties of CO2 at typical reservoir conditions (3000 ft depth, 120°F)?",
        "How do you assess caprock integrity for CO2 storage? Include geochemical considerations.",
        "Calculate the CO2 storage capacity using the methodology from the CO2 Storage Atlas."
    ],
    
    "Machine Learning in Geosciences": [
        "How can neural networks be applied to seismic facies classification? What are the key preprocessing steps?",
        "Explain the use of clustering algorithms for well log analysis and formation evaluation.",
        "What machine learning approaches are most effective for reservoir property prediction from seismic data?"
    ]
}

In [None]:
def create_advanced_gradio_interface():
    """Create an advanced Gradio interface for scientific geoscience queries"""
    
    # Global session for the advanced interface
    global advanced_session
    advanced_session = advanced_agent.create_new_session()
    
    def advanced_respond(message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
        global advanced_session
        
        if not message.strip():
            return "", history
        
        # Get response from advanced agent
        bot_response = advanced_agent.chat(message, advanced_session)
        history.append((message, bot_response))
        return "", history
    
    def clear_advanced_conversation() -> List[Tuple[str, str]]:
        global advanced_session
        advanced_agent.clear_memory(advanced_session)
        advanced_session = advanced_agent.create_new_session()
        return []
    
    def load_scenario_questions(scenario_name: str) -> str:
        """Load all questions from a scenario"""
        if scenario_name in advanced_scenarios:
            questions = advanced_scenarios[scenario_name]
            return "\n\n".join([f"Q{i+1}: {q}" for i, q in enumerate(questions)])
        return ""
    
    # Create the advanced interface
    with gr.Blocks(
        title="Dr. GeoBot-Advanced - Scientific Geoscience AI",
        theme=gr.themes.Base(),
        css="""
        .gradio-container { background: linear-gradient(45deg, #1e3a8a, #1e40af) }
        .chat-message { background: rgba(255,255,255,0.95); border-radius: 10px; }
        """
    ) as advanced_demo:
        
        gr.Markdown("""
        # 🧬 Dr. GeoBot-Advanced - Scientific Geoscience AI
        ### *Powered by Galactica-6.7B for Advanced Scientific Reasoning*
        
        | **🔬 Advanced Capabilities** | **📊 Quantitative Analysis** | **🎯 Research Applications** |
        |---|---|---|
        | Complex equation derivation | Statistical reservoir analysis | ML/AI in geosciences |
        | Multi-physics modeling | Uncertainty quantification | Carbon sequestration |
        | Advanced interpretation | Geomechanical calculations | Unconventional resources |
        
        💡 *Ask detailed scientific questions with quantitative requirements*
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    value=[],
                    height=600,
                    show_label=False,
                    bubble_full_width=False,
                    avatar_images=("🧑‍🔬", "🤖")
                )
                
                with gr.Row():
                    msg = gr.Textbox(
                        placeholder="Ask advanced geoscience questions (include specific parameters, equations, calculations)...",
                        show_label=False,
                        scale=5,
                        container=False
                    )
                    send_btn = gr.Button("🚀 Analyze", scale=1, variant="primary")
                
                with gr.Row():
                    clear_btn = gr.Button("🗑️ Clear Session", variant="secondary")
                    export_btn = gr.Button("📋 Export Chat", variant="secondary")
            
            with gr.Column(scale=1):
                gr.Markdown("### 🔬 Advanced Query Categories")
                
                scenario_dropdown = gr.Dropdown(
                    choices=list(advanced_scenarios.keys()),
                    label="Select Scientific Domain",
                    value=None
                )
                
                scenario_text = gr.Textbox(
                    label="Scenario Questions",
                    lines=8,
                    placeholder="Select a domain to see advanced questions..."
                )
                
                load_scenario_btn = gr.Button("📖 Load Questions", variant="secondary")
                
                gr.Markdown("""
                ### 💡 Quick Examples
                - *"Calculate fracture pressure using Eaton's method"*
                - *"Derive the Gardner equation for density prediction"*
                - *"Explain Gassmann fluid substitution theory"*
                - *"Model CO2 solubility in brine at reservoir conditions"*
                """)
        
        # Event handlers
        msg.submit(advanced_respond, [msg, chatbot], [msg, chatbot])
        send_btn.click(advanced_respond, [msg, chatbot], [msg, chatbot])
        clear_btn.click(clear_advanced_conversation, outputs=chatbot)
        
        scenario_dropdown.change(
            load_scenario_questions,
            inputs=scenario_dropdown,
            outputs=scenario_text
        )
        
        load_scenario_btn.click(
            lambda text: text,
            inputs=scenario_text,
            outputs=msg
        )
    
    return advanced_demo

# Create and launch interface
print("Creating Advanced Geoscience Gradio Interface with Galactica...")
advanced_demo = create_advanced_gradio_interface()
advanced_demo.launch(share=True, show_error=True)