<a href="https://colab.research.google.com/github/EvgeniaKantor/MeNow/blob/main/Toxicity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
df = pd.read_excel('df_with_toxicity_info.xlsx')

In [None]:
# Adding an empty column 'Toxic or Nontoxic'
df['Toxic_Gemini'] = None

In [None]:
import pandas as pd
import google.generativeai as genai
import os
import time
import re
from google.api_core.exceptions import ServiceUnavailable
from google.colab import userdata

# Retrieve and set the API key
GOOGLE_API_KEY = userdata.get('gemini_key')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

# Configure the generative AI client
genai.configure(api_key=GOOGLE_API_KEY)

# Load the generative model
model = genai.GenerativeModel('gemini-pro')

# Function to clean text in the abstract
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    return text.strip()  # Strip leading and trailing spaces

# Function to generate toxicity information from abstract
def get_toxicity(abstract):
    query = (
        f"Determine if Withania Somnifera is toxic to humans based on the following abstract ("
        f"Respond with 'Toxic' or 'Nontoxic').\n\nAbstract:\n{abstract}"
    )
    retries = 3  # Number of retries
    delay = 5  # Initial delay in seconds

    while retries > 0:
        try:
            response = model.generate_content(query)
            if response and response.candidates and response.candidates[0].content.parts:
                response_content = response.candidates[0].content.parts[0].text.strip()
                return response_content
            else:
                print(f"No valid content found for abstract: {abstract}")
                return None
        except IndexError as e:
            print(f"IndexError: {str(e)}")
            return None
        except ServiceUnavailable as e:
            print(f"Service unavailable. Retrying in {delay} seconds...")
            time.sleep(delay)
            retries -= 1
            delay *= 2  # Exponential backoff
        except Exception as e:
            print(f"Exception: {str(e)}")
            return None
    else:
        print(f"Maximum retries reached for abstract: {abstract}. Unable to generate toxicity.")
        return None

# Add a new column for cleaned abstracts
df['Cleaned_Abstract'] = df['Abstract'].apply(lambda x: clean_text(x) if pd.notna(x) else '')

# Rate limiting parameters
delay_between_requests = 1  # Delay in seconds between requests

# Generate toxicity information for each cleaned abstract
for index, row in df.iterrows():
    try:
        if pd.isna(row['Toxic or Nontoxic']) or row['Toxic or Nontoxic'] == '':  # Check if cell is empty
            print(f"Processing index {index}...")
            toxicity_info = get_toxicity(row.get('Cleaned_Abstract', ''))
            if toxicity_info:
                df.at[index, 'Toxic or Nontoxic'] = toxicity_info
                print(f"Generated toxicity information for index {index}: {toxicity_info}")
            time.sleep(delay_between_requests)  # Delay between requests
    except Exception as e:
        print(f"Failed to generate toxicity information for index {index}: {e}")

In [None]:
#save df
df.to_excel('df.xlsx', index=False)