# Initial prompting of Llama3.2 using Combo_3 prompt to generate features for datasets

In [None]:
import csv
import os
import re
import pandas as pd
import requests

# === File Paths ===
smiles_csv_path = r'C:\PhD\Project\Vero_E6.csv'
prompt_part1_path = r'C:\PhD\Prompt_Engineering\Final_Prompts\Combo_3.txt'
output_csv_path = r'C:\PhD\Prompt_Engineering\Outputs2\DS7_Vero_E6.csv'

# === Load Prompt Component (part 1 only) ===
with open(prompt_part1_path, 'r', encoding='utf-8') as f1:
    prompt_part1 = f1.read().strip()

# === Read SMILES from CSV ===
df = pd.read_csv(smiles_csv_path)
smiles_list = df['PUBCHEM_EXT_DATASOURCE_SMILES'].dropna().unique()

# === Output CSV Setup ===
write_header = not os.path.exists(output_csv_path) or os.path.getsize(output_csv_path) == 0
with open(output_csv_path, 'a', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)

    if write_header:
        writer.writerow(["SMILES"] + [f"Feature_{i+1}" for i in range(15)])

    # === Helper Function to Parse LLaMA Output ===
    def parse_llama_output(output):
        # Clean and split the response
        output = output.strip()
    
        # Early check: model refused
        if "I can't" in output or "use RDKit" in output or len(output) == 0:
            return None
    
        # Match exact numeric pattern
        numbers = re.findall(r'-?\d+(?:\.\d+)?', output)
    
        # Check for exactly 15 values
        return numbers if len(numbers) == 15 else None



    # === Ollama Prompting Loop ===
    for smiles in smiles_list:
        full_prompt = f"{prompt_part1}\nSMILES: {smiles}"

        try:
            response = requests.post(
                'http://localhost:11434/api/generate',
                json={
                    'model': 'llama3.2',
                    'prompt': full_prompt,
                    'stream': False,
                    'temperature': 0.0,
                    'top_p': 1.0
                }
            )
            raw_output = response.json().get('response', '').strip()
            parsed = parse_llama_output(raw_output)

            if parsed:
                writer.writerow([smiles] + parsed)
            else:
                print(f"❌ Invalid output for SMILES: {smiles}\nResponse:\n{raw_output}\n")
                writer.writerow([smiles] + ["INVALID"] * 15)

        except Exception as e:
            print(f"❌ Error processing SMILES: {smiles}\nError: {str(e)}\n")
            writer.writerow([smiles] + ["ERROR"] * 15)

print("✅ Ollama prompt generation complete.")