In [1]:
from sentence_transformers import SentenceTransformer, util
import torch

# 1. Charger un modèle spécialisé pour les articles scientifiques
model = SentenceTransformer('allenai-specter')

# 2. Texte de référence (extrait d’article scientifique réel)
texte_reference = """
{
  "hypothesis": "Silk fibroin can be bioengineered to mimic insect-inspired nanostructures responsible for structural coloration, enabling the development of a self-cleaning, structurally colored, energy-passive, biocompatible material with broad applicability in biomaterial design. This multifunctional biopolymer will be fabricated using a recombinant expression system coupled with nanoscale patterning via directed self-assembly and may eliminate the energy-intensive requirements typical of structural coloration in insects.",
  
  "outcome": "The engineered silk-based material will demonstrate (1) vivid, tunable structural coloration across the visible spectrum (400–700 nm), (2) a self-cleaning hydrophobic surface with a contact angle >150°, (3) tensile strength of ~500 MPa and elasticity >15%, (4) biodegradation rates controllable from days to months, and (5) biocompatibility with >95% cell viability in mammalian cell culture assays. Light reflectance measurements will show >85% reflectivity at peak wavelength due to the photonic crystal-like architecture.",
  
  "mechanisms": "At the molecular scale, silk fibroin will be genetically modified to incorporate histidine-rich repeats and elastin-like sequences to enhance nanoscale ordering during self-assembly. Directed self-assembly techniques (e.g., capillary-force lithography and evaporation-induced nanostructuring) will be used to pattern silk into quasi-periodic nanostructures (~150–250 nm spacing) that interfere with visible light. Self-cleaning will emerge from hierarchical roughness combined with surface chemical modification (e.g., fluorination or silanization). Biocompatibility is mediated by the natural amino acid composition of fibroin, while mechanical strength is governed by β-sheet crystallites formed during post-processing (e.g., methanol annealing).",
  
  "design_principles": [
    "Utilize recombinant DNA techniques to produce silk fibroin with sequence motifs favoring nanostructure alignment and crystallinity.",
    "Apply evaporation-driven self-assembly and nanoimprint lithography to create insect-mimetic surface topographies with precise nanogratings (period ~180 nm, depth ~100 nm).",
    "Integrate hierarchical roughness by combining nanostructures with microscale pillars (~10–30 µm pitch) for dual-scale superhydrophobicity.",
    "Optimize β-sheet content (via FTIR or XRD control) to balance mechanical integrity and biodegradation rate.",
    "Design for spectral tunability by modulating nanostructure periodicity to achieve blue to red coloration without pigments.",
    "Ensure surface chemistry remains inert or cell-adhesive by controlling functional groups (e.g., RGD peptides for cell adhesion or PEGylation for anti-fouling)."
  ],
  
  "unexpected_properties": [
    "Angle-independent coloration due to quasi-random nanostructure arrays may emerge, offering consistent color regardless of viewing angle.",
    "The material may exhibit photonic thermal insulation, reflecting infrared while allowing visible transmission, reducing thermal load in medical implants.",
    "Due to localized electric field enhancements at nanostructure peaks, the silk surface could exhibit nonlinear optical effects under high-intensity light.",
    "Surface nanostructures may trap airborne pathogens or particulates, leading to passive antimicrobial or filtration behavior when paired with enzymatic coatings."
  ],
  
  "comparison": {
    "Compared to traditional silk fibroin, this engineered material introduces structural coloration and self-cleaning—absent in native silk.",
    "Versus pigment-based coloration, this approach is non-toxic, fade-resistant, and biocompatible.",
    "Compared to structurally colored insect cuticles, the silk analog requires 90–95% less metabolic energy (no biological development), enabling scalable, energy-passive production.",
    "Compared to existing synthetic photonic materials (e.g., polymer opals), this material is biodegradable, mechanically robust (twice the toughness), and biologically safe."
  },
  
  "novelty": "This hypothesis proposes the first integration of bioengineered silk fibroin with insect-inspired structural coloration and multifunctional nanostructures, yielding a biodegradable, self-cleaning, structurally colored material without relying on pigments or energy-intensive processing. It bridges bioinspired nanophotonics and sustainable biomaterials, offering new frontiers in implantable devices, responsive textiles, and passive optical materials. Unlike any known biomaterial, this system merges optical, mechanical, and biochemical functionality in a single self-assembling protein polymer."
}

"""

# 3. Hypothèses générées depuis différents sous-graphes
hypotheses_generees = [
    """
{
  "hypothesis": "By engineering a recombinant silk fibroin-based biomaterial that mimics the structural coloration of insects, we can create a multifunctional, self-cleaning, and biocompatible coating with tunable optical properties for energy-efficient biomedical and optical applications, while reducing the energy-intensive synthesis typically associated with such nanostructures.",
  "outcome": "The resulting material is expected to exhibit: (1) tunable structural coloration across 400-700 nm wavelength range with >90% reflectivity, (2) self-cleaning properties with water contact angles >150°, (3) biocompatibility meeting ISO 10993-5 standards, (4) 50% reduction in energy input compared to insect-based structural coloration production, and (5) programmable degradation rates from 1 week to 1 year via controlled crosslinking of fibroin β-sheet domains.",
  "mechanisms": {
    "molecular": "Genetically modified fibroin (e.g., [GAGAGS]n repeats with inserted anionic domains) will self-assemble into β-sheet nanocrystals (5-10 nm) interspersed with amorphous regions, creating refractive index contrast (n=1.55 vs 1.25).",
    "nanoscale": "Precision electrospinning at 15-25 kV with 80% humidity will produce hierarchical fibers (200-500 nm diameter) with cholesteric liquid crystal organization (pitch=λ/2n) for structural color.",
    "macroscale": "The material will demonstrate mechanochromic response (Δλ>50 nm under 10% strain) due to elastic deformation of the photonic nanostructure, while maintaining >80% transparency in thin films (<10 μm)."
  },
  "design_principles": [
    "Bioinspired design: Replicate Morpho butterfly wing architecture using computational models of chitin-protein assemblies, but substitute with fibroin to enhance biocompatibility.",
    "Energy minimization: Utilize ambient-temperature aqueous processing (pH 4.2, 25°C) rather than insect metabolic pathways that require ATP-intensive amino acid polymerization.",
    "Multifunctional integration: Covalently graft fluorinated silanes (e.g., (heptadecafluoro-1,1,2,2-tetrahydrodecyl)triethoxysilane) to fibroin's tyrosine residues for simultaneous self-cleaning and optical functionality.",
    "Programmable degradation: Control β-sheet content (30-70%) via methanol vapor exposure time (0-24h) to tailor enzymatic degradation rates by protease XIV."
  ],
  "unexpected_properties": {
    "Dynamic color switching": "The material may exhibit unexpected voltage-sensitive color changes (Δλ~30 nm at 5V) due to alignment of fibroin's dipole moments (calculated μ=3.2D) in electric fields.",
    "Anomalous adhesion": "Despite superhydrophobicity, the nanostructured surface could show gecko-like dry adhesion (Fad>1 mN/mm²) from van der Waals interactions between β-sheet nanocrystals and substrates.",
    "Bioactive luminescence": "Trace incorporation of lanthanides (e.g., Eu³+) during fibroin assembly may yield unexpected long-persistence phosphorescence (τ>100 ms) useful for biosensing."
  },
  "comparison": {
    "vs synthetic photonic crystals": "50% lower production energy than silicon inverse opals (2.5 kWh/g vs 5 kWh/g), while maintaining comparable Q factors (>1000) for optical applications.",
    "vs traditional biomaterials": "Superior multifunctionality index (MFI=8.3) compared to collagen (MFI=4.1) or chitosan (MFI=5.7) by combining optical, self-cleaning, and mechanical properties.",
    "vs insect cuticle": "10× faster structural color response to environmental changes (τ=10 ms vs 100 ms) due to fibroin's lower viscoelastic modulus (E'=2 GPa vs chitin's 7 GPa)."
  },
  "novelty": {
    "First integration": "This represents the first integration of structural coloration with silk's inherent biocompatibility, enabling new applications like color-changing surgical sutures that indicate infection pH.",
    "Energy paradigm shift": "Demonstrates that energy-intensive biological processes can be replicated through ambient biofabrication, challenging assumptions about biomimetic manufacturing costs.",
    "Emergent functionality": "The combination of properties creates unexpected capabilities - e.g., the material could serve simultaneously as a wound dressing, bacterial sensor (via color change), and self-cleaning surface.",
    "Computational design": "Employs novel molecular dynamics simulations (ReaxFF force field) to predict optimal fibroin modifications for photonic properties before synthesis."
  }
}
    """,
    """
{
  "hypothesis": "Engineered Bombyx mori silkworms, through targeted CRISPR-Cas9 gene editing of the chitin synthase genes (CHS1-8) and the introduction of genes encoding for structurally colored biopolymers found in energy-intensive insects (e.g., iridophores producing photonic crystals with a periodicity of 100-300 nm), will produce silk fibers with integrated, self-cleaning structural coloration and enhanced biocompatibility due to the modified chitin content in the sericin layer, thereby expanding its broad applicability in biomaterial design for visually dynamic and infection-resistant medical implants.",
  "outcome": "The genetically modified silkworms are expected to produce silk fibers exhibiting vibrant, angle-dependent structural colors across the visible spectrum (wavelengths of 400-700 nm) with a color purity (full width at half maximum of the reflectance peak) of less than 50 nm. The modified sericin layer, with a hypothesized reduction of 30-50% in chitin content (quantified by HPLC-MS analysis of hydrolyzed sericin), will demonstrate a statistically significant (p < 0.05) increase in fibroblast cell adhesion (at least 20% higher cell density after 24 hours, measured by confocal microscopy and MTT assay) and a reduction in bacterial biofilm formation (at least 40% reduction in biomass, quantified by crystal violet staining and SEM imaging) compared to conventional silk. The self-cleaning properties, assessed by contact angle measurements (achieving a water contact angle > 110° after surface modification with fluorinated silanes inspired by insect epicuticles), combined with the inherent biocompatibility and structural coloration, will demonstrate the potential for these modified silk scaffolds in applications such as visually trackable and infection-resistant surgical meshes and smart wound dressings.",
  "mechanisms": "The structural coloration will arise from the self-assembly of the introduced photonic crystal-forming biopolymers (e.g., guanine nanocrystals arranged in a periodic lattice within the silk fibroin matrix) during the silk spinning process within the silkworm's silk glands. The CRISPR-mediated reduction in chitin synthesis will alter the composition and surface morphology of the sericin layer, influencing protein adsorption and cellular interactions, potentially leading to enhanced biocompatibility by reducing pro-inflammatory responses. The self-cleaning property will be achieved by post-processing the silk fibers with a thin layer of hydrophobic or superhydrophobic material, mimicking the epicuticular waxes found on some self-cleaning insects, resulting in a high water contact angle and low surface energy, preventing adhesion of contaminants. The energy-intensive nature of the insects inspiring the structural coloration highlights the metabolic cost associated with producing complex biomaterials, which will be a consideration in the scalability of this engineered silk production.",
  "design_principles": [
    "**Genetic Engineering for Multifunctionality:** Employ CRISPR-Cas9 to downregulate chitin synthase genes in Bombyx mori to modify the sericin layer for improved biocompatibility.",
    "**Biomimetic Integration of Structural Color:** Introduce genes encoding for photonic crystal-forming biopolymers, inspired by energy-intensive insects exhibiting vibrant structural colors, to be co-expressed and integrated into the silk fibroin during fiber formation.",
    "**Hierarchical Self-Assembly:** Leverage the natural self-assembly processes of silk proteins and the introduced biopolymers to create ordered microstructures responsible for structural coloration within the fiber.",
    "**Surface Functionalization for Self-Cleaning:** Apply a thin, biocompatible hydrophobic coating (e.g., based on fluorinated silanes or bio-derived waxes) to the silk surface to impart self-cleaning properties, drawing inspiration from insect cuticle nanostructures.",
    "**Modular Biomaterial Design:** Aim for a modular design where the degree of structural coloration, self-cleaning, and biocompatibility can be tuned by varying the expression levels of the introduced genes and the surface modification techniques.",
    "**In-vivo Biopolymer Production:** Utilize the silkworm's natural silk production machinery as a bioreactor for the synthesis and assembly of the multifunctional biopolymer.",
    "**Non-toxic Processing:** Employ biocompatible and environmentally friendly methods for any post-processing surface modifications to maintain the overall biocompatibility of the engineered silk.",
    "**Visual Feedback Mechanism:** The inherent structural coloration will serve as a visual indicator of the material's integrity and potential degradation over time in biomedical applications."
  ],
  "unexpected_properties": [
    "**Enhanced Mechanical Strength with Color:** The incorporation of the photonic crystal structures within the fibroin matrix might unexpectedly reinforce the silk fibers, leading to a higher tensile strength (potentially a 10-15% increase in Young's modulus compared to unmodified silk).",
    "**Tunable Drug Release Kinetics via Structural Changes:** The modified sericin layer and the presence of the structural color components could alter the porosity and surface area of the silk scaffold, leading to unexpected changes in drug loading capacity and release kinetics for potential drug delivery applications.",
    "**Bio-luminescent Silk:** If genes for bioluminescent proteins, also found in some energy-intensive insects, are co-expressed, the resulting silk fibers might exhibit inherent luminescence, providing a novel tracking or diagnostic feature.",
    "**Self-Healing Properties:** The altered chitin-protein interactions in the sericin layer might, unexpectedly, enhance the self-healing capabilities of the silk material upon minor damage.",
    "**Antimicrobial Structural Color:** The specific nanostructures responsible for structural coloration might exhibit inherent antimicrobial properties due to their surface topography, offering an additional layer of infection resistance beyond the modified sericin.",
    "**Temperature-Responsive Color Change:** Careful selection and engineering of the photonic crystal-forming biopolymers could lead to silk fibers that exhibit reversible color changes in response to temperature variations, potentially enabling real-time monitoring of physiological conditions."
  ],
  "comparison": "Conventional silk biomaterials primarily rely on their inherent biocompatibility and mechanical strength. Compared to these, the engineered silk would offer integrated multifunctionality, combining biocompatibility with self-cleaning properties and visually dynamic structural coloration, eliminating the need for additional dyes or coatings that can leach or degrade over time. Synthetic biomaterials like polyethylene glycol (PEG) hydrogels or polycaprolactone (PCL) scaffolds often require surface modifications to achieve biocompatibility or self-cleaning, which can compromise their mechanical integrity or introduce cytotoxic components. The proposed engineered silk, produced biologically, aims for a more integrated and potentially more biocompatible solution. The energy cost of producing structural colors in insects (e.g., the metabolic investment in iridophore development) serves as a natural benchmark for the efficiency of the engineered silkworm's production. The structural coloration achieved would be distinct from pigment-based coloration in terms of its angle-dependent iridescence and resistance to photobleaching, offering superior long-term visual stability for implantable devices.",
  "novelty": "This research proposes a novel bio-fabrication approach to create a multifunctional biomaterial by genetically engineering a well-established biopolymer producer (Bombyx mori) to imbue silk fibers with self-cleaning and structural coloration functionalities at the genetic level. This in-vivo integration of complex optical properties and enhanced biocompatibility, inspired by the sophisticated biomaterials found in energy-intensive insects, represents a significant departure from traditional post-processing modifications of biomaterials. The potential for visually dynamic and inherently infection-resistant biomaterials opens up entirely new avenues for applications in medical implants, smart textiles, and bio-integrated photonics, offering functionalities that are currently not achievable with existing materials or require complex and potentially unstable fabrication processes. The use of the silkworm as a bioreactor for producing these advanced biomaterials offers a potentially scalable and sustainable alternative to energy-intensive synthetic manufacturing methods."
}
    """,
    """
{
  "hypothesis": "Silk-based biomaterials, engineered with structural coloration and self-cleaning properties, can significantly reduce the energy intensity of biomedical and engineering applications by leveraging the multifunctionality and biocompatibility of silk fibroin. This hypothesis aims to create a new class of energy-efficient, multifunctional materials inspired by the energy-intensive processes of insects.",

  "outcome": "The expected findings include a 30-40% reduction in energy consumption for applications such as tissue scaffolds, drug delivery systems, and biomedical implants. The silk-based materials will exhibit enhanced biocompatibility, with a cell viability rate of over 95%, and structural coloration properties that eliminate the need for energy-intensive dyeing processes. The self-cleaning properties will reduce maintenance costs by 25-35%.",

  "mechanisms": "The primary mechanism involves the molecular structure of silk fibroin, which consists of repetitive sequences of glycine, alanine, and serine. These sequences form beta-sheets that contribute to silk's strength and biocompatibility. Structural coloration will be achieved through nanoscale patterning of the silk surface, mimicking the photonic crystals found in insect wings. Self-cleaning properties will be engineered through surface modifications that create hydrophobic and oleophobic surfaces, inspired by the lotus leaf effect.",

  "design_principles": [
    "Utilize recombinant DNA technology to produce silk fibroin with enhanced biocompatibility and multifunctionality.",
    "Incorporate nanoscale patterning techniques, such as nanoimprint lithography, to create structural coloration on the silk surface.",
    "Engineer self-cleaning properties through surface modifications, such as plasma treatment or chemical vapor deposition, to create hydrophobic and oleophobic surfaces.",
    "Develop biomimetic designs inspired by insect wings to optimize energy efficiency and structural integrity.",
    "Integrate silk-based materials into biomedical applications, such as tissue scaffolds and drug delivery systems, to leverage their biocompatibility and multifunctionality.",
    "Conduct in vitro and in vivo studies to assess the biocompatibility and functionality of the engineered silk materials.",
    "Evaluate the energy efficiency of the silk-based materials through comparative studies with traditional biomaterials."
  ],

  "unexpected_properties": "The silk-based materials may exhibit unexpected properties, such as enhanced mechanical strength due to the nanoscale patterning, and improved thermal conductivity, which could further reduce energy consumption. Additionally, the structural coloration may provide unexpected optical properties, such as fluorescence or iridescence, which could be leveraged for imaging and sensing applications.",

  "comparison": "Compared to traditional biomaterials, such as polylactic acid (PLA) and polycaprolactone (PCL), silk-based materials offer superior biocompatibility and multifunctionality. PLA and PCL have cell viability rates of around 85-90%, whereas silk-based materials are expected to achieve over 95%. Additionally, the energy-intensive processes required for the production and maintenance of traditional biomaterials will be significantly reduced through the use of silk-based materials with self-cleaning and structural coloration properties.",

  "novelty": "The novelty of this research lies in the integration of structural coloration and self-cleaning properties into silk-based biomaterials, inspired by the energy-intensive processes of insects. This approach advances existing knowledge by leveraging the unique properties of silk fibroin to create a new class of energy-efficient, multifunctional materials. The proposed research has the potential to revolutionize biomedical and engineering applications by reducing energy consumption, enhancing biocompatibility, and providing novel functionalities."
}
    """
]

# 4. Encoder les textes
embedding_ref = model.encode(texte_reference, convert_to_tensor=True)

# 5. Comparaison et distances
print("Comparaison avec le texte de référence :\n")
for i, hyp in enumerate(hypotheses_generees):
    embedding_hyp = model.encode(hyp, convert_to_tensor=True)
    similarity = util.cos_sim(embedding_ref, embedding_hyp).item()
    distance = 1 - similarity
    print(f"Hypothèse #{i+1} :")
    print(f"  → Similarité cosinus : {similarity:.4f}")
    print(f"  → Distance sémantique : {distance:.4f}")
    print()


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/2.57k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/622 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/331 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/462k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Comparaison avec le texte de référence :

Hypothèse #1 :
  → Similarité cosinus : 0.9784
  → Distance sémantique : 0.0216

Hypothèse #2 :
  → Similarité cosinus : 0.9300
  → Distance sémantique : 0.0700

Hypothèse #3 :
  → Similarité cosinus : 0.9561
  → Distance sémantique : 0.0439

