In [1]:
import os
import glob
import openai
import json
import ast

def load_api_key(config_file):
    with open(config_file) as f:
        config = json.load(f)
    return config['api_key']

def configure_openai_api(api_key):
    openai.api_key = api_key

def get_text_files(directory):
    return glob.glob(os.path.join(directory, '*.txt'))

def read_file(file_path):
    with open(file_path, 'r') as f:
        content = f.read()
    return content

## openai.api_key  = "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"

def output_directories(path,name):
    if not os.path.exists(path):
        os.makedirs(path)
    n_files = len(get_text_files(path))
    n_files = n_files + 1
    if n_files < 10:
        n_files = "0" + str(n_files)
    else:
        n_files = str(n_files)
    nameFile = name + n_files + ".txt"
    return(path + nameFile)

from __future__ import print_function

def find_values(id, json_repr):
    results = []

    def _decode_dict(a_dict):
        try:
            results.append(a_dict[id])
        except KeyError:
            pass
        return a_dict

    json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.
    return results

directory = "./textFiles/"
config_file = "config.json"

api_key = load_api_key(config_file)
configure_openai_api(api_key)


In [2]:
def get_completion(prompt, model="gpt-4", temperature=0):
    messages = [{"role": "system", "content": 'You are a social worker'},
                {"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

def get_completion_from_messages(messages, model="gpt-4", temperature=0.3):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
#     print(str(response.choices[0].message))
    return response.choices[0].message["content"]

In [44]:
###################################
###################################
###################################
####### EXTRAPOLATION  ############
###################################
###################################
# INPUT: Text, 50 to 100 word max texts containing feedback.
# OUTPUT: Extrapolation of the text as 10 word max.

# Setup new file of extrapolations

output_Extrapolation_dir = "./outputExtrapolation/"
outputExtrapolationFile = output_directories(output_Extrapolation_dir,"extrapolation_")
output_ExtrapolationProgress_dir = "./progressExtrapolation/"
outputProgressExtrapolationFile = output_directories(output_ExtrapolationProgress_dir,"progressExtrapolation_")

summaries = {}
content_master = ""
counter = 1
text_files = get_text_files(directory)
counter = 1
for file_path in text_files:
    file_name = os.path.basename(file_path)
    content = read_file(file_path)
    prompt = f"""
    Your task is to extract relevant information from  
    the opinion of parents about the school of their children 
    to give feedback to the Ministry of Education. \

    From the opinion below, delimited by triple backticks 
    extract the information relevant to the state of the 
    schools and the education system. Limit to one sentence with at most 10 words. \

    Opinion: 
    """
    
    content_prompt = f"""
    ```{content}```
    """
    response = get_completion(prompt+content_prompt)
    content_master += f"{str(counter)}: {response} \n "
    counter += 1
    
print(content_master)

#######################################
#######################################
# Saving results

f = open(outputExtrapolationFile, "w")
f.write(content_master)
f.close()  
f = open(outputProgressExtrapolationFile, "w")
f.write("prompt: \n")
f.write(prompt)
f.write("\n")
f.write("response: \n")
f.write(content_master)
f.close()
    

1: Teaching methods in Danish schools are outdated. 
 2: Danish schools receive positive feedback from parents. 
 3: Underfunding leads to worse schools, needing more teachers and resources. 
 4: School quality varies by neighborhood and local population. 
 5: Children enjoy freedom, creativity, and foundational support in education system. 
 6: Schools are well-equipped but lack engaging teaching methods. 
 7: More stability in school staff would benefit children's education. 
 8: Teachers feel demotivated, need support for better work environment. 
 9: Teachers are poorly paid and educated; needs ministry intervention. 
 10: Strict curriculum limits flexibility and individualized learning. 
 11: Teachers unenthusiastic, school requires improved equipment. 
 


In [16]:
###################################
###################################
###################################
####### CATEGORIES ################
###################################
###################################
# INPUT: Extrapolation, 10 word max summary of texts.
# OUTPUT: Categories, categorization of the extrapolated texts

# Setup new file of categories
output_Dir = "./outputOnlyCategories/"
nameOutput_File = "identifyOnlyCategories_"
output_File = output_directories(output_Dir,nameOutput_File)
outputProgress_Dir = "./progressOnlyCategories/"
outputProgress_File = output_directories(outputProgress_Dir,"progressIdentifyOnlyCategories_")

# load extrapolation file
pathFile = "./outputExtrapolation/"
nameFile = "extrapol*.txt"
list_of_files = glob.glob(pathFile + nameFile) # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
content = read_file(latest_file)

####################################
####################################
# Prompt for Categories
####################################
####################################

prompt = f"""
Your task is to extract the topics discussed in a sequence of sentences delimited by <>. 
Each sentence is represented by a number. \

To perform the task follow these steps: \
- work out the topics discussed in the sentences. \
- reflect if you can find better topics to represent the sentences. \
- explain each topic in a concise way. \
- output the response as a python dictionary that contains the following 
key: topic name, and values: explanation. \

Sentences: <{content}>
"""


temperature = 0
model = 'gpt-4'
response = get_completion(prompt,model,temperature)

print(response)

#######################################
#######################################
# Saving results
    
f = open(output_File, "w")
f.write(response)
f.close()
f = open(outputProgress_File, "w")
f.write("temperature: ")
f.write(str(temperature))
f.write(", model: ")
f.write(model)
f.write("\n")
f.write("prompt: \n")
f.write(prompt)
f.write("\n")
f.write("response: \n")
f.write(response)
f.close()


{
  "Education System": "Covers the overall quality, funding, and curriculum of schools.",
  "Teaching Methods": "Refers to the approaches and techniques used in classrooms, including engagement and flexibility.",
  "Parental Feedback": "Represents the opinions and satisfaction of parents with the education system.",
  "School Staff": "Addresses the stability, motivation, and qualifications of teachers and other school personnel.",
  "Neighborhood Factors": "Discusses the impact of local population and neighborhood on school quality."
}


In [17]:
###################################
###################################
###################################
####### PAIRING  ##################
###### EXTRAPOLATION ##############
####### CATEGORIES ################
###################################
###################################
# INPUT: Extrapolation and Categories.
# OUTPUT: Index of which extrapolation best fits each category.

# Setup new file of categories
output_Dir = "./outputPairing/"
nameOutput_File = "pairingExtrapolCategory_"
output_File = output_directories(output_Dir,nameOutput_File)
outputProgress_Dir = "./progressPairingExtrapolCategory/"
outputProgress_File = output_directories(outputProgress_Dir,"progressPairingExtrapolCategory_")

# load extrapolation file
pathFile = "./outputExtrapolation/"
nameFile = "extrapol*.txt"
list_of_files = glob.glob(pathFile + nameFile) # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
contentExtrapolations = read_file(latest_file)

# load categories file
pathFile = "./outputOnlyCategories/"
nameFile = "*Only*.txt"
list_of_files = glob.glob(pathFile + nameFile) # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
contentCategories = read_file(latest_file)

####################################
####################################
# Prompt for pairing 
#  extrapolations 
#       with
#    categories
####################################
####################################

prompt = f"""
Your task is to associate the sentences delimited by <> to a category from the python dictionary delimited by "". \
Associate the sentences to a category by using its explanation value of the python dictionary. \
Each sentence is represented by a number. \

To perform the task follow these steps: \
- use the provided explanations to cluster the sentences. \
- reflect on how the explanations explain the sentences. \
- use only the provided categories. \
- associate all the sentences. \
- one sentence can be associated with more than one category. \
- output the response as a python dictionary that contains the following 
key: category name, and value: sentence number. \

Sentences: <{contentExtrapolations}> \

Categories: "{contentCategories}"
"""


temperature = 0
model = 'gpt-4'
response = get_completion(prompt,model,temperature)

print(response)

#######################################
#######################################
# Saving results
    
f = open(output_File, "w")
f.write(response)
f.close()
f = open(outputProgress_File, "w")
f.write("temperature: ")
f.write(str(temperature))
f.write(", model: ")
f.write(model)
f.write("\n")
f.write("prompt: \n")
f.write(prompt)
f.write("\n")
f.write("response: \n")
f.write(response)
f.close()



{
  "Education System": [1, 3, 4, 5, 6, 10],
  "Teaching Methods": [1, 6, 10],
  "Parental Feedback": [2, 5],
  "School Staff": [7, 8, 9, 11],
  "Neighborhood Factors": [4]
}
