# Assessing the attitudes towards the proposed new ACMA powers to combat misinformation and disinformation


## Generate prompt

This function takes the submission and submission author (from doc name) as a
parameter and returns the formatted prompt to be sent to the LLM


In [None]:
def prompt_formatted(submission_string: str, submission_author: str) -> str:    
    # Read the first file and set a string variable
    with open('prompt.txt', 'r') as file:
        prompt = file.read()
        
    with open('prompt_issues.md', 'r') as file:
        issues = file.read()

    with open('prompt_guidance_note.md', 'r') as file:
        guidance_note = file.read()

    with open('prompt_fact_sheet.md', 'r') as file:
        fact_sheet = file.read()

    prompt = prompt.replace('|issues|', issues)
    prompt = prompt.replace('|guidance_note|', guidance_note)
    prompt = prompt.replace('|fact_sheet|', fact_sheet)

    prompt += "\n\n***************************************** SUBMISSION START *****************************************\n\n"

    prompt += f"Submission from: {submission_author}\n\n"
    
    prompt += submission_string

    prompt += "\n\n***************************************** SUBMISSION END *****************************************\n\n"

    return prompt

## Get AI response

This function calls the AI model to elicit a response


In [None]:
from az_client import call_ai, get_vector
from tqdm.notebook import tqdm
from db.docs import DocumentManager
from db.db_instance import DBClient
import json
import os
import shutil
from collections import Counter

client = DBClient()
db = DocumentManager()

def get_targetted_function(category):
    if (category == 'civil society' or category == 'academic'):
        with open('function_civil_society.json', 'r') as f:
            return json.load(f)
    if (category == 'digital platform'):
        with open('function_digital.json', 'r') as f:
            return json.load(f)
    if (category == 'news'):
        with open('function_news.json', 'r') as f:
            return json.load(f)    

def add_previous_categories():
    categories = []
    docs = db.get_all_docs()    
    for doc in docs:
        if 'motivations' not in doc:
            continue
        for motivation in doc['motivations']:
            if motivation not in categories:
                categories.append(motivation)
    
    # Count the frequency of each category
    category_counts = Counter(categories)
    
    # Get the top 30 categories by frequency
    top_categories = [category for category, count in category_counts.most_common(100)]
    
    return top_categories

def get_function():
    with open('function.json', 'r') as f:
        function = json.load(f)
        categories = add_previous_categories()        
        function['function']['parameters']['properties']['motivations']['description'] = function['function']['parameters']['properties']['motivations']['description'].replace('|motivations|', str(categories))        
    return function       

def extract_name_from_filename(filename):
    parts = filename.split('-')
    name_parts = parts[1:]
    name = ' '.join(name_parts).split('.')[0]
    if name.find('anonymous') != -1:
        name = 'anonymous'
    return name.lower()

def add_to_json(data, filename):
    if not os.path.isfile('./data/processed_data.json'):
        with open('./data/processed_data.json', 'w') as f:
            json.dump([], f)    
    with open('./data/processed_data.json', 'r') as f:
        previous_data = json.load(f)
        previous_data.append({filename: data})    
    with open('./data/processed_data.json', 'w') as f:
        json.dump(previous_data, f)

def process_files_in_directory(directory, completed_directory):    
    addition_questions_to_ask = ['civil society', 'digital platform', 'news', 'academic']
    # Check if the 'completed' directory exists, if not, create it
    if not os.path.exists(completed_directory):
        os.makedirs(completed_directory)
    # Get a list of markdown files to process
    markdown_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) and f.endswith('.md')]
    # Initialize the progress bar
    for filename in tqdm(markdown_files, desc='Processing files'):
        filepath = os.path.join(directory, filename)
        try:
            with open(filepath, 'r') as file:
                submission = file.read()
            sub_author = extract_name_from_filename(filename)            
            prompt = prompt_formatted(submission, sub_author)
            function = get_function()
            response = call_ai(prompt, function)
            
            response["author"] = sub_author
            
            response["file_name"] = filename.replace('.md', '')

            if response['responder_category'].lower() in addition_questions_to_ask:
                add_questions = get_targetted_function(response['responder_category'].lower())
                response['questions'] = json.dumps(call_ai(prompt, add_questions))

            vector = get_vector(submission)

            db.new_doc(response, vector, True)
            add_to_json(response, filename.split('-')[0])
            # Move the processed file to the 'completed' directory
            completed_filepath = os.path.join(completed_directory, filename)
            shutil.move(filepath, completed_filepath)
        except Exception as e:
            print(f"Error processing file: {filename}")
            print(e)
            continue

# Example usage
directory = './data/files/converted'
completed_directory = './data/files/completed'
process_files_in_directory(directory, completed_directory)