In [1]:
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
import pandas as pd

# Load dataset from the provided CSV
dataset_path = "Data.csv"  # Ensure this file is in the same directory
try:
    df = pd.read_csv(dataset_path)
    # Clean data: remove rows with missing Su (strength) and handle other nulls
    df = df.dropna(subset=['Su'])
    # Map data to required format
    data = [
        {
            "density": float(row["Ro"]) if pd.notnull(row["Ro"]) else 7860.0,  # Default to 7860 for steel if Ro is null
            "thickness": 1.0,  # Placeholder; adjust if thickness data is added
            "strength": float(row["Su"]),  # Use ultimate tensile strength (Su) in MPa
            "text": f"Material: {row['Material']}, Heat treatment: {row['Heat treatment'] or 'N/A'}, Strength: {row['Su']} MPa, Yield: {row['Sy'] or 'N/A'} MPa, Elongation: {row['A5'] or 'N/A'}%, Hardness (Bhn): {row['Bhn'] or 'N/A'}"
        }
        for _, row in df.iterrows() if pd.notnull(row["Su"])
    ]
    texts = [item["text"] for item in data]
    strengths = np.array([item["strength"] for item in data], dtype=np.float32)
except Exception as e:
    raise Exception(f"Error loading dataset: {e}")

# Load models
try:
    encoder = SentenceTransformer('all-MiniLM-L6-v2')
    embedding_matrix = encoder.encode(texts)
    generator = pipeline("text2text-generation", model="google/flan-t5-base")
except Exception as e:
    raise Exception(f"Error loading models: {e}")

# RAG function
def rag_qa(query):
    try:
        if not query or not isinstance(query, str):
            return "Invalid query", "", "Please enter a valid question.", "N/A"
        
        query_embedding = encoder.encode([query])
        scores = cosine_similarity(query_embedding, embedding_matrix)[0]
        best_idx = np.argmax(scores)
        context = texts[best_idx]
        prompt = f"Context: {context}\nAnswer the question: {query} with the strength value in MPa."
        response = generator(prompt, max_new_tokens=50, do_sample=False)[0]['generated_text']
        predicted_strength = strengths[best_idx]
        return query, context, response.strip(), f"{predicted_strength:.2f}"
    except Exception as e:
        return query, "", f"Error: {e}", "N/A"

# Gradio UI
demo = gr.Interface(
    fn=rag_qa,
    inputs=gr.Textbox(label="Ask about material strength", placeholder="e.g., What is the strength of Steel SAE 1040 normalized?"),
    outputs=[
        gr.Textbox(label="Query", interactive=False),
        gr.Textbox(label="Retrieved Context", interactive=False),
        gr.Textbox(label="Model Answer", interactive=False),
        gr.Textbox(label="Predicted Strength (MPa)", interactive=False)
    ],
    title="RAG-powered Material Insight Tool",
    description="Ask about material strength given material type and heat treatment. Uses semantic retrieval and generation. Dataset from Data.csv.",
    examples=[
        ["What is the strength of Steel SAE 1040 normalized?"],
        ["What is the yield strength of Steel SAE 1050 annealed?"]
    ]
)

demo.launch(inline=True)

Device set to use cpu


* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.


