# recherche du sous-graphe de connaissance

In [1]:
import os
from mistralai import Mistral

api_key = "ZzROxj7Fuf63vP41wYOtCyNM95Gijq86" #inscrire la clé API
model = "mistral-large-latest" #sélectionner le modèle

client = Mistral(api_key=api_key)

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON

#Résolution du label vers l'identifiant Wikidata
def get_wikidata_id(label: str, lang="en") -> str:
    endpoint = SPARQLWrapper("https://query.wikidata.org/sparql")
    query = f"""
    SELECT ?item WHERE {{
      ?item rdfs:label "{label}"@{lang} .
    }} LIMIT 1
    """
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    results = endpoint.query().convert()
    bindings = results["results"]["bindings"]
    return bindings[0]["item"]["value"].split("/")[-1] if bindings else None

In [3]:
#Recherche de chemins RDF entre deux entités
def get_semantic_paths(term1: str, term2: str, lang="en", max_results=None):
    id1 = get_wikidata_id(term1, lang)
    id2 = get_wikidata_id(term2, lang)

    if not id1 or not id2:
        raise ValueError("Un ou les deux termes n'ont pas pu être trouvés sur Wikidata.")

    print(f"🔎 Résolution des entités : '{term1}' → {id1}, '{term2}' → {id2}")

    endpoint = SPARQLWrapper("https://query.wikidata.org/sparql")
    limit_clause = f"LIMIT {max_results}" if max_results else ""

    query = f"""
    SELECT ?step1Label ?rel1Label ?step2Label ?rel2Label ?step3Label ?rel3Label ?step4Label ?rel4Label ?step5Label WHERE {{
      wd:{id1} ?p1 ?step1 .
      ?rel1 wikibase:directClaim ?p1 .
      ?step1 ?p2 ?step2 .
      ?rel2 wikibase:directClaim ?p2 .
      ?step2 ?p3 ?step3 .
      ?rel3 wikibase:directClaim ?p3 .
      ?step3 ?p4 ?step4 .
      ?rel4 wikibase:directClaim ?p4 .
      ?step4 ?p5 wd:{id2} .

      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}" . }}
    }}
    {limit_clause}
    """
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    results = endpoint.query().convert()

    chains = []
    for res in results["results"]["bindings"]:
        try:
            path = [
                term1,
                "→", res["rel1Label"]["value"],
                "→", res["step1Label"]["value"],
                "→", res["rel2Label"]["value"],
                "→", res["step2Label"]["value"],
                "→", res["rel3Label"]["value"],
                "→", res["step3Label"]["value"],
                "→", res["rel4Label"]["value"],
                "→", res["step4Label"]["value"],
                "→", term2
            ]
            chains.append(" ".join(path))
        except KeyError:
            continue

    return chains

In [4]:
#Interface utilisateur CLI
if __name__ == "__main__":
    print("🌍 Recherche de chemin sémantique entre deux concepts via Wikidata")
    concept1 = input("📝 Entrez le premier concept : ")
    concept2 = input("📝 Entrez le second concept : ")

    use_limit = input("Souhaitez-vous fixer un nombre maximal de résultats ? (y/n) : ").strip().lower()
    max_results = int(input("🔢 Nombre maximal de résultats à retourner : ")) if use_limit == "y" else None

    try:
        chemins = get_semantic_paths(concept1, concept2, lang="en", max_results=max_results)
        if not chemins:
            print("❌ Aucun chemin trouvé.")
        else:
            print(f"\n🔗 {len(chemins)} chemin(s) trouvé(s) :")
            for chemin in chemins:
                print(chemin)
    except Exception as e:
        print("❗ Erreur :", e)

🌍 Recherche de chemin sémantique entre deux concepts via Wikidata
🔎 Résolution des entités : '' → Q85221364, '' → Q85221364
❌ Aucun chemin trouvé.


# le LLM

In [5]:
definition = client.chat.complete(
     model=model,
     messages=[
         {
             "role": "user",
             "content": f"""You are a sophisticated ontologist trained in scientific research, engineering, and innovation. 
    
Given the following key concepts extracted from a comprehensive knowledge graph, your task is to define each one of the terms and discuss the relationships identified in the graph.

Consider this list of nodes and relationships from a knowledge graph between "{concept1}" and "{concept2}". 

The format of the knowledge graph is "node_1 -- relationship between node_1 and node_2 -- node_2 -- relationship between node_2 and node_3 -- node_3...."

Here is the graph:

{chemins}

Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 

Do not add any introductory phrases. First, define each term in the knowledge graph and then, secondly, discuss each of the relationships, with context. """
         },
     ],
)

In [6]:
chat_response = client.chat.complete(
    model=model,
    messages=[
        {
            "role": "user",
            "content":f"""You are a sophisticated scientist trained in scientific research and innovation. 
    
Given the following key concepts extracted from a comprehensive knowledge graph, your task is to synthesize a novel research hypothesis. Your response should not only demonstrate deep understanding and rational thinking but also explore imaginative and unconventional applications of these concepts. 
    
Consider this list of nodes and relationships from a knowledge graph between "{concept1}" and "{concept2}". \
The format of the graph is "node_1 -- relationship between node_1 and node_2 -- node_2 -- relationship between node_2 and node_3 -- node_3...."

Here is the graph:

{chemins}

{definition.choices[0].message.content}Analyze the graph deeply and carefully, then craft a detailed research hypothesis that investigates a likely groundbreaking aspect that incorporates EACH of these concepts. Consider the implications of your hypothesis and predict the outcome or behavior that might result from this line of investigation. Your creativity in linking these concepts to address unsolved problems or propose new, unexplored areas of study, emergent or unexpected behaviors, will be highly valued.

Be as quantitative as possible and include details such as numbers, sequences, or chemical formulas. Please structure your response in JSON format, with SEVEN keys: 

"hypothesis" clearly delineates the hypothesis at the basis for the proposed research question.

"outcome" describes the expected findings or impact of the research. Be quantitative and include numbers, material properties, sequences, or chemical formula.

"mechanisms" provides details about anticipated chemical, biological or physical behaviors. Be as specific as possible, across all scales from molecular to macroscale.

"design_principles" should list out detailed design principles, focused on novel concepts and include a high level of detail. Be creative and give this a lot of thought, and be exhaustive in your response. 

"unexpected_properties" should predict unexpected properties of the new material or system. Include specific predictions, and explain the rationale behind these clearly using logic and reasoning. Think carefully.

"comparison" should provide a detailed comparison with other materials, technologies or scientific concepts. Be detailed and quantitative. 

"novelty" should discuss novel aspects of the proposed idea, specifically highlighting how this advances over existing knowledge and technology. 

Ensure your scientific hypothesis is both innovative and grounded in logical reasoning, capable of advancing our understanding or application of the concepts provided.

Here is an example structure for your response, in JSON format:

{{
  "hypothesis": "...",
  "outcome": "...",
  "mechanisms": "...",
  "design_principles": "...",
  "unexpected_properties": "...",
  "comparison": "...",
  "novelty": "...",
}}

Remember, the value of your response is as scientific discovery, new avenues of scientific inquiry and potential technological breakthroughs, with details and solid reasoning.

Make sure to incorporate EACH of the concepts in the knowledge graph in your response. 
""",
        },
    ]
)

SDKError: API error occurred: Status 429
{"object":"error","message":"Service tier capacity exceeded for this model.","type":"invalid_request_error","param":null,"code":null}

In [7]:
sortie = chat_response.choices[0].message.content

print(sortie)

```json
{
  "hypothesis": "The integration of advanced telecommunications technology with energy-harvesting materials in consumer electronics can lead to the development of self-sustaining mobile devices that significantly reduce reliance on external power sources. This hypothesis aims to explore the potential of creating a phone that can convert ambient energy (such as mechanical, thermal, and electromagnetic) into electrical energy, leveraging principles from physics and historical innovations in telephony.",

  "outcome": "The expected outcome is the creation of a prototype phone that can operate continuously for at least 72 hours without conventional charging, utilizing energy harvested from the environment. This device will incorporate piezoelectric materials for mechanical energy conversion, thermoelectric generators for thermal energy, and antennas designed to capture ambient electromagnetic radiation. The device will achieve an energy conversion efficiency of at least 30%, with

In [8]:
defi = definition.choices[0].message.content
print(defi)

### Definitions

1. **Phone**: A telecommunications device that allows two or more users to conduct a conversation when they are too far apart to be heard directly.
2. **Telephone**: A historical term for a phone, specifically referring to landline devices.
3. **Home Appliance**: Devices or machines that are used in a household to perform a specific task.
4. **Appliance**: A device or instrument designed to perform a particular task or function.
5. **Machine**: A mechanical or electrical device that performs or assists in the performance of human tasks.
6. **Energy**: The capacity to do work or produce change.
7. **Electrical Appliance**: An appliance that uses electricity as its primary energy source.
8. **Electricity**: A form of energy resulting from the existence of charged particles (such as electrons or protons), either statically as an electric charge or dynamically as an electric current.
9. **Consumer Electronics**: Electronic equipment intended for everyday use, typically in 

# Evaluation de la distance sémantique

In [5]:
from sentence_transformers import SentenceTransformer, util
import torch

# 1. Charger un modèle spécialisé pour les articles scientifiques
model1 = SentenceTransformer('allenai-specter')

In [7]:
# 2. Texte de référence
texte_reference = """
{
  "hypothesis": "Silk fibroin can be bioengineered to mimic insect-inspired nanostructures responsible for structural coloration, enabling the development of a self-cleaning, structurally colored, energy-passive, biocompatible material with broad applicability in biomaterial design. This multifunctional biopolymer will be fabricated using a recombinant expression system coupled with nanoscale patterning via directed self-assembly and may eliminate the energy-intensive requirements typical of structural coloration in insects.",
  
  "outcome": "The engineered silk-based material will demonstrate (1) vivid, tunable structural coloration across the visible spectrum (400–700 nm), (2) a self-cleaning hydrophobic surface with a contact angle >150°, (3) tensile strength of ~500 MPa and elasticity >15%, (4) biodegradation rates controllable from days to months, and (5) biocompatibility with >95% cell viability in mammalian cell culture assays. Light reflectance measurements will show >85% reflectivity at peak wavelength due to the photonic crystal-like architecture.",
  
  "mechanisms": "At the molecular scale, silk fibroin will be genetically modified to incorporate histidine-rich repeats and elastin-like sequences to enhance nanoscale ordering during self-assembly. Directed self-assembly techniques (e.g., capillary-force lithography and evaporation-induced nanostructuring) will be used to pattern silk into quasi-periodic nanostructures (~150–250 nm spacing) that interfere with visible light. Self-cleaning will emerge from hierarchical roughness combined with surface chemical modification (e.g., fluorination or silanization). Biocompatibility is mediated by the natural amino acid composition of fibroin, while mechanical strength is governed by β-sheet crystallites formed during post-processing (e.g., methanol annealing).",
  
  "design_principles": [
    "Utilize recombinant DNA techniques to produce silk fibroin with sequence motifs favoring nanostructure alignment and crystallinity.",
    "Apply evaporation-driven self-assembly and nanoimprint lithography to create insect-mimetic surface topographies with precise nanogratings (period ~180 nm, depth ~100 nm).",
    "Integrate hierarchical roughness by combining nanostructures with microscale pillars (~10–30 µm pitch) for dual-scale superhydrophobicity.",
    "Optimize β-sheet content (via FTIR or XRD control) to balance mechanical integrity and biodegradation rate.",
    "Design for spectral tunability by modulating nanostructure periodicity to achieve blue to red coloration without pigments.",
    "Ensure surface chemistry remains inert or cell-adhesive by controlling functional groups (e.g., RGD peptides for cell adhesion or PEGylation for anti-fouling)."
  ],
  
  "unexpected_properties": [
    "Angle-independent coloration due to quasi-random nanostructure arrays may emerge, offering consistent color regardless of viewing angle.",
    "The material may exhibit photonic thermal insulation, reflecting infrared while allowing visible transmission, reducing thermal load in medical implants.",
    "Due to localized electric field enhancements at nanostructure peaks, the silk surface could exhibit nonlinear optical effects under high-intensity light.",
    "Surface nanostructures may trap airborne pathogens or particulates, leading to passive antimicrobial or filtration behavior when paired with enzymatic coatings."
  ],
  
  "comparison": {
    "Compared to traditional silk fibroin, this engineered material introduces structural coloration and self-cleaning—absent in native silk.",
    "Versus pigment-based coloration, this approach is non-toxic, fade-resistant, and biocompatible.",
    "Compared to structurally colored insect cuticles, the silk analog requires 90–95% less metabolic energy (no biological development), enabling scalable, energy-passive production.",
    "Compared to existing synthetic photonic materials (e.g., polymer opals), this material is biodegradable, mechanically robust (twice the toughness), and biologically safe."
  },
  
  "novelty": "This hypothesis proposes the first integration of bioengineered silk fibroin with insect-inspired structural coloration and multifunctional nanostructures, yielding a biodegradable, self-cleaning, structurally colored material without relying on pigments or energy-intensive processing. It bridges bioinspired nanophotonics and sustainable biomaterials, offering new frontiers in implantable devices, responsive textiles, and passive optical materials. Unlike any known biomaterial, this system merges optical, mechanical, and biochemical functionality in a single self-assembling protein polymer."
}

"""

In [13]:
# 4. Encoder les textes
embedding_ref = model1.encode(texte_reference, convert_to_tensor=True)

# 5. Comparaison et distances
print("Comparaison avec le texte de référence :\n")
embedding_hyp = model1.encode(sortie, convert_to_tensor=True)
similarity = util.cos_sim(embedding_ref, embedding_hyp).item()
print(f"Hypothèse :")
print(f"  → Similarité cosinus : {similarity:.4f}")
if similarity > 0.6 and similarity <= 0.8:
    print("L'hypothèse est proche mais pas identique")
elif similarity > 0.95:
    print("L'hypothèse est presque identiques (paraphrase ou copie)")
elif similarity > 0.8 and similarity <= 0.95:
    print("L'hypothèse est très similaires et représente les mêmes idées")
elif similarity <= 0.6 and similarity > 0.4:
    print("L'hypothèse est différente mais peut partager des concepts communs")
elif similarity <= 0.4:
    print("L'hypothèse est très différente et ne partage pas de concepts communs")
else:
    print("Il y a une erreur dans la comparaison de l'hypothèse")

Comparaison avec le texte de référence :

Hypothèse :
  → Similarité cosinus : 0.6422
L'hypothèse est proche mais pas identique
