In [None]:
from sentence_transformers import SentenceTransformer, util
import torch

# 1. Charger un modèle spécialisé pour les articles scientifiques
model = SentenceTransformer('allenai-specter')

# 2. Texte de référence (extrait d’article scientifique réel)
texte_reference = """
{
  "hypothesis": "Silk fibroin can be bioengineered to mimic insect-inspired nanostructures responsible for structural coloration, enabling the development of a self-cleaning, structurally colored, energy-passive, biocompatible material with broad applicability in biomaterial design. This multifunctional biopolymer will be fabricated using a recombinant expression system coupled with nanoscale patterning via directed self-assembly and may eliminate the energy-intensive requirements typical of structural coloration in insects.",
  
  "outcome": "The engineered silk-based material will demonstrate (1) vivid, tunable structural coloration across the visible spectrum (400–700 nm), (2) a self-cleaning hydrophobic surface with a contact angle >150°, (3) tensile strength of ~500 MPa and elasticity >15%, (4) biodegradation rates controllable from days to months, and (5) biocompatibility with >95% cell viability in mammalian cell culture assays. Light reflectance measurements will show >85% reflectivity at peak wavelength due to the photonic crystal-like architecture.",
  
  "mechanisms": "At the molecular scale, silk fibroin will be genetically modified to incorporate histidine-rich repeats and elastin-like sequences to enhance nanoscale ordering during self-assembly. Directed self-assembly techniques (e.g., capillary-force lithography and evaporation-induced nanostructuring) will be used to pattern silk into quasi-periodic nanostructures (~150–250 nm spacing) that interfere with visible light. Self-cleaning will emerge from hierarchical roughness combined with surface chemical modification (e.g., fluorination or silanization). Biocompatibility is mediated by the natural amino acid composition of fibroin, while mechanical strength is governed by β-sheet crystallites formed during post-processing (e.g., methanol annealing).",
  
  "design_principles": [
    "Utilize recombinant DNA techniques to produce silk fibroin with sequence motifs favoring nanostructure alignment and crystallinity.",
    "Apply evaporation-driven self-assembly and nanoimprint lithography to create insect-mimetic surface topographies with precise nanogratings (period ~180 nm, depth ~100 nm).",
    "Integrate hierarchical roughness by combining nanostructures with microscale pillars (~10–30 µm pitch) for dual-scale superhydrophobicity.",
    "Optimize β-sheet content (via FTIR or XRD control) to balance mechanical integrity and biodegradation rate.",
    "Design for spectral tunability by modulating nanostructure periodicity to achieve blue to red coloration without pigments.",
    "Ensure surface chemistry remains inert or cell-adhesive by controlling functional groups (e.g., RGD peptides for cell adhesion or PEGylation for anti-fouling)."
  ],
  
  "unexpected_properties": [
    "Angle-independent coloration due to quasi-random nanostructure arrays may emerge, offering consistent color regardless of viewing angle.",
    "The material may exhibit photonic thermal insulation, reflecting infrared while allowing visible transmission, reducing thermal load in medical implants.",
    "Due to localized electric field enhancements at nanostructure peaks, the silk surface could exhibit nonlinear optical effects under high-intensity light.",
    "Surface nanostructures may trap airborne pathogens or particulates, leading to passive antimicrobial or filtration behavior when paired with enzymatic coatings."
  ],
  
  "comparison": {
    "Compared to traditional silk fibroin, this engineered material introduces structural coloration and self-cleaning—absent in native silk.",
    "Versus pigment-based coloration, this approach is non-toxic, fade-resistant, and biocompatible.",
    "Compared to structurally colored insect cuticles, the silk analog requires 90–95% less metabolic energy (no biological development), enabling scalable, energy-passive production.",
    "Compared to existing synthetic photonic materials (e.g., polymer opals), this material is biodegradable, mechanically robust (twice the toughness), and biologically safe."
  },
  
  "novelty": "This hypothesis proposes the first integration of bioengineered silk fibroin with insect-inspired structural coloration and multifunctional nanostructures, yielding a biodegradable, self-cleaning, structurally colored material without relying on pigments or energy-intensive processing. It bridges bioinspired nanophotonics and sustainable biomaterials, offering new frontiers in implantable devices, responsive textiles, and passive optical materials. Unlike any known biomaterial, this system merges optical, mechanical, and biochemical functionality in a single self-assembling protein polymer."
}

"""

# 3. Hypothèses générées depuis différents sous-graphes
hypotheses_generees = [
    """
{
  "hypothesis": "Silk-based biomaterials can be engineered to exhibit structural coloration and self-cleaning properties, significantly reducing energy consumption in optical and biological applications by mimicking the energy-efficient structural adaptations found in insects.",
  "outcome": "The engineered silk biomaterials will demonstrate structural coloration with a 50% reduction in light absorption compared to conventional pigment-based colorants, and a self-cleaning efficiency of 90% within 24 hours of exposure to contaminants. This will lead to a potential energy saving of 30% in applications requiring frequent cleaning and color stability.",
  "mechanisms": "The silk fibroin will be modified to include nanostructures similar to those found in insect wings, which cause structural coloration through light interference. The self-cleaning property will be achieved by incorporating hydrophobic surface treatments and photocatalytic nanoparticles such as titanium dioxide (TiO2) within the silk matrix. The biocompatibility of silk will ensure minimal immune response in biological applications, while its biodegradability will allow for environmentally friendly disposal.",
  "design_principles": [
    "Utilize the inherent biocompatibility and multifunctionality of silk fibroin to create a versatile biomaterial.",
    "Incorporate nanoscale patterns on the silk surface to mimic the structural coloration of insects, such as the Morpho butterfly, which uses multilayer nanostructures to produce vivid blue colors.",
    "Integrate hydrophobic coatings and photocatalytic TiO2 nanoparticles to achieve self-cleaning properties, reducing the need for external cleaning agents.",
    "Ensure the biomaterial retains its mechanical strength and flexibility through optimized processing conditions, such as controlled drying and cross-linking.",
    "Conduct thorough biocompatibility and degradation tests to confirm the material's safety and environmental impact."
  ],
  "unexpected_properties": "The engineered silk biomaterial may exhibit unexpected properties such as enhanced antimicrobial activity due to the synergistic effect of structural coloration and self-cleaning nanoparticles. This could lead to a reduction in bacterial adhesion by up to 80% compared to unmodified silk. Additionally, the material may show improved optical sensing capabilities, with a sensitivity increase of 20% in detecting environmental changes due to the light interference patterns.",
  "comparison": "Compared to traditional optical materials like glass or synthetic polymers, the silk-based biomaterial will offer superior biocompatibility and biodegradability, making it ideal for medical implants and environmental sensors. In terms of energy efficiency, the self-cleaning property will outperform conventional materials that require frequent manual cleaning, reducing maintenance costs by 40%. The structural coloration will provide long-term color stability without the need for energy-intensive pigment production processes.",
  "novelty": "This research advances existing knowledge by integrating structural coloration and self-cleaning properties into a single biomaterial, leveraging the unique advantages of silk fibroin. The energy-efficient and environmentally friendly aspects of the material represent a significant step forward in sustainable biomaterial design. The potential applications in medical devices, optical sensors, and energy-efficient coatings demonstrate the broad applicability and innovative nature of this approach."
}
    """
]

# 4. Encoder les textes
embedding_ref = model.encode(texte_reference, convert_to_tensor=True)

# 5. Comparaison et distances
print("Comparaison avec le texte de référence :\n")
for i, hyp in enumerate(hypotheses_generees):
    embedding_hyp = model.encode(hyp, convert_to_tensor=True)
    similarity = util.cos_sim(embedding_ref, embedding_hyp).item()
    distance = 1 - similarity
    print(f"Hypothèse #{i+1} :")
    print(f"  → Similarité cosinus : {similarity:.4f}")
    print(f"  → Distance sémantique : {distance:.4f}")
    print()


Comparaison avec le texte de référence :

Hypothèse #1 :
  → Similarité cosinus : 0.9469
  → Distance sémantique : 0.0531

