In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained("/storage1/fs1/yinjie.tang/Active/Shawn_Xiao/Qwen1.5-72B-Chat",
    device_map="auto", torch_dtype="auto"
)
tokenizer = AutoTokenizer.from_pretrained("/storage1/fs1/yinjie.tang/Active/Shawn_Xiao/Qwen1.5-72B-Chat")

#Qwen1.5-72B-Chat-GPTQ-Int4
#Qwen1.5-14B-Chat

Loading checkpoint shards:   0%|          | 0/38 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from pyvis.network import Network
import pandas as pd
import re
import networkx as nx

# Load the Excel file
filepath = 'modified_updated(Qwen1.5 72b)_Anabaena nitrogen fixation_causal.xlsx'
df = pd.read_excel(filepath, engine='openpyxl')

# Initialize NetworkX Graph
G = nx.Graph()

# Nodes to exclude
# words_to_exclude = ['Synechocystis', 'Cyanobacteria', 'cyanobacteria']
words_to_exclude = []

# Regular expression to match the pattern (entity A, entity B)
pattern = r'\(([^,]+), ([^\)]+)\)'

# Iterate over the DataFrame rows to extract entity pairs and their sources
for _, row in df.iterrows():
    # value = row['Response to New Question']
    value = row['Answer to Question 2']
    source = row['Title']  # Extract source for each pair

    matches = re.findall(pattern, value)
    for entity_a, entity_b in matches:
        # Check if any word to exclude is part of the entity names
        if not any(word in entity_a for word in words_to_exclude) and not any(word in entity_b for word in words_to_exclude):
            G.add_node(entity_a, label=entity_a)
            G.add_node(entity_b, label=entity_b)
            G.add_edge(entity_a, entity_b, title=source)

def search_network(graph, keyword, depth=1):
    nodes_of_interest = {n for n, attr in graph.nodes(data=True) if keyword.lower() in attr['label'].lower()}
    for _ in range(depth):
        for node in list(nodes_of_interest):
            nodes_of_interest.update(set(nx.neighbors(graph, node)))
    return graph.subgraph(nodes_of_interest)

# Perform search
keyword = "nitrogen"  # Replace with your keyword
filtered_graph = search_network(G, keyword)

# Extract node names from the filtered graph
node_names = list(filtered_graph.nodes())

# Prepare a simple text summary of node names
node_names_text = ", ".join(node_names)

# Now, `node_names_text` contains a clean, comma-separated list of node names, ready for summarization
print(node_names_text)

# Initialize Pyvis network with the filtered graph
net = Network(height="2160px", width="100%", bgcolor="#222222", font_color="white")
net.from_nx(filtered_graph)

# Continue with setting options and saving the network as before
net.set_options("""
{
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -80000,
      "centralGravity": 0.5,
      "springLength": 75,
      "springConstant": 0.05,
      "damping": 0.09,
      "avoidOverlap": 0.5
    },
    "maxVelocity": 100,
    "minVelocity": 0.1,
    "solver": "barnesHut",
    "timestep": 0.3,
    "stabilization": {
        "enabled": true,
        "iterations": 500,
        "updateInterval": 10,
        "onlyDynamicEdges": false,
        "fit": true
    }
  },
  "nodes": {
    "font": {
      "size": 30,
      "color": "white"
    }
  }
}
""")





# Save and show the network
net.write_html('filterd_entity_network.html')

photoheterotrophic cultures, Proportion of heterocysts, nitrogen-fixing cell differentiation, hydrogen gas, nitrogen fixation inhibition, Entity 'nitrogenase activity', nitrogen deficiency signal, Mo, Entity 'stimulatory effect of glyoxylate on the nitrogenase', total nitrogen, FurC, underlying soil chlorophyll, Reduced ferredoxin, Entity 'A. (Anabaena)', hydrogen production, independent of nitrogen metabolism, Entity Anabaena variabilis, heterocyst differentiation, Entity 'PN2', Entity 'Nitrogen-starved Conditions', Heterocyst formation, nrrA disruption, PatS, organisms of the shards, Nitrogen isotope fractionation, +) when using thylakoids from vegetative cells.

Formatted as combinations:

((recombinant plant-type ferredoxins from Anabaena sp. PCC 7120, Cd, metal deposition, 0.2 M sucrose, nifH sequences, Ferredoxin III, nitrogen availability, Environmental conditions, (soil pH 9.23, L. boryana, Atmospheric nitrogen, growth medium, Anabaena sp. UTEX 2576, available fixed nitrogen, N

In [4]:
from IPython.display import Markdown

def trim_text(text, max_length):
    if len(text) > max_length:
        return text[:max_length].rsplit(' ', 1)[0] + "..."  # Trim to max_length, avoid cutting words in half
    else:
        return text
    
# Apply the trimming function to node_names_text
cut_off_chunk_size = 30000
trimmed_node_names_text = trim_text(node_names_text, cut_off_chunk_size)

# Construct the prompt with the potentially trimmed node_names_text
# prompt = "These are the terms related to " + filepath + keyword + ", categorize them and write a summary report." + trimmed_node_names_text

prompt = "These are the terms related to " + filepath + keyword + ", I try to use the nitrogen fixation ability of Anabaena sp to produce bio-fertilizer, given these information, write a experiment plan" + node_names_text

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=10000
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response1 = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
display(Markdown(response1))

Experiment Plan: Anabaena Nitrogen Fixation for Bio-Fertilizer Production

Objective:
To utilize the nitrogen-fixing ability of Anabaena sp. to produce an effective bio-fertilizer, optimizing conditions for nitrogen fixation and maximizing its benefits for plant growth.

1. Strain Selection and Preparation:
- Choose a high-yielding, stable Anabaena strain, such as Anabaena variabilis or Anabaena sp. PCC 7120, known for efficient nitrogen fixation.
- Culture the selected strain under photoheterotrophic conditions to promote growth and nitrogen fixation.

2. Optimization of Environmental Factors:
- Investigate the optimal pH for nitrogen fixation (e.g., 9.23 for some strains).
- Study the effects of varying light intensities, day/night cycles, and temperature on nitrogenase activity.
- Assess the influence of carbon/nitrogen ratios (C:N) on nitrogen fixation efficiency.

3. Heterocyst Formation and Maintenance:
- Determine the ideal proportion of heterocysts in the filament to ensure efficient nitrogenase activity.
- Investigate factors that stimulate heterocyst differentiation, such as nitrogen deficiency signals, glyoxylate, and FurC.
- Study the impact of Mo and other trace elements on nitrogenase function.

4. Nitrogenase Protection and Inhibition:
- Evaluate the role of protective mechanisms against oxygen, including hydrogen gas production and the involvement of flavodoxin and ferredoxin.
- Investigate the effects of inhibitors like carbofuran and metals on nitrogenase activity.
- Test potential protectants like reduced ferredoxin and hydrogenase genes.

5. Nitrogen Assimilation and Regulation:
- Analyze the expression of nitrogen-regulated genes, including nif genes, ntcA, and nrrA.
- Monitor the impact of nitrogen starvation on nif gene expression and nitrogenase activity.
- Study the influence of global nitrogen regulators like NtcA and CNF2 on nitrogen assimilation.

6. Bio-fertilizer Formulation:
- Immobilize Anabaena cells in a suitable matrix, such as calcium alginate, for controlled release of fixed nitrogen.
- Optimize the formulation for stability, shelf life, and ease of application.
- Evaluate the compatibility of Anabaena with other beneficial microorganisms for synergistic effects.

7. Field Trials:
- Conduct greenhouse trials to assess the bio-fertilizer's effectiveness in promoting plant growth under varying nitrogen regimes.
- Monitor plant health, biomass, and yield compared to conventional fertilizers.
- Measure the impact on soil nitrogen levels and nitrogen isotopic fractionation.

8. Economic Analysis:
- Estimate the cost-effectiveness of the bio-fertilizer production process.
- Compare the environmental benefits, such as reduced dependence on chemical fertilizers and mitigation of greenhouse gases.

9. Monitoring and Adaptation:
- Continuously monitor the performance of the bio-fertilizer under different agricultural conditions.
- Adjust the formulation and cultivation methods based on field results and feedback.

By conducting these experiments, we aim to develop a sustainable and environmentally friendly bio-fertilizer that harnesses the nitrogen-fixing potential of Anabaena sp. to improve crop yields while maintaining soil health.

# 