### Merging all the annotated text files into one text file for better readability on our part

In [None]:
import os
import openai
directory = r"C:\Users\Lenovo\Downloads\End-to-end-LLM-main\End-to-end-LLM-main\annotations\web_text"

def read_and_process_text_files(directory):
    """
    Reads text files from a directory, processes them, and returns a list of text strings.
    """
    all_texts = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):  # Only process .txt files
            filepath = os.path.join(directory, filename)
            try:
                with open(filepath, "r", encoding="utf-8") as file:
                    text = file.read()
                    # Basic text processing (you can add more sophisticated cleaning here)
                    text = text.strip()  # Remove leading/trailing whitespace
                    if text:  # Only add non-empty texts
                        all_texts.append(text)
            except Exception as e:
                print(f"Error reading file {filename}: {e}")
    return all_texts



# Main execution
if __name__ == "__main__":
    texts = read_and_process_text_files(directory)
    output_file = "combined_texts.txt"
    with open(output_file, "w", encoding="utf-8") as outfile:
        for text in texts:
            outfile.write(text + "\n\n")  
    print(f"Texts saved to {output_file}")

#### Reading said text file an storing it as a list (Did this seperately because we wanted to create a massive file containing all the questions and answer all of which would be stored in a list with the question following the answer. Could've just used the texts data structure but that wouldn't allow for easy repetition considering we would have to run the above slightly long code)

In [22]:
def read_text_file_into_list(filename):
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            content_list = file.readlines()
            # Remove newline characters if needed
            content_list = [line.strip() for line in content_list]
            return content_list
    except FileNotFoundError:
        print(f"The file {filename} does not exist.")
        return None

# Example usage
filename = 'combined_texts.txt'
content_list = read_text_file_into_list(filename)
if content_list:
    print(content_list)


['Q: What is VisitPITTSBURGH dedicated to?', 'A: VisitPITTSBURGH is dedicated to generating business events, sports events, and leisure travel business for the Pittsburgh region.', '', 'Q: When was VisitPITTSBURGH established?', 'A: VisitPITTSBURGH was established in 1935.', '', 'Q: Is VisitPITTSBURGH a for-profit or not-for-profit organization?', 'A: VisitPITTSBURGH is a not-for-profit organization serving its business membership.', '', 'Q: What type of events does VisitPITTSBURGH focus on?', 'A: VisitPITTSBURGH focuses on conventions, trade shows, and leisure travel.', '', 'Q: Where can I find the annual reports for VisitPITTSBURGH?', 'A: The annual reports for VisitPITTSBURGH can be found on their website, including reports from previous years like 2022, 2021, and earlier.', '', 'Q: What are some of the partnerships VisitPITTSBURGH is affiliated with?', 'A: VisitPITTSBURGH is affiliated with organizations such as the American Bus Association, Meeting Professionals International, and

#### Reading the csv test file into a list. This is for easy iteration through each question

In [30]:
import csv

def read_csv_into_list(filename):
    try:
        with open(filename, 'r') as file:
            csv_reader = csv.reader(file)
            data_list = list(csv_reader)
            # Flatten the nested list into a single list
            flattened_list = [item for sublist in data_list for item in sublist]
            return flattened_list
    except FileNotFoundError:
        print(f"The file {filename} does not exist.")
        return None

# Example usage
filename = 'test_set.csv'
data_list = read_csv_into_list(filename)
if data_list:
    print(data_list)


['Which festival is designed for vegetarian and vegan food lovers?', 'Which counties host major maple festivals near Pittsburgh?', "Which new sports team is mentioned as completing the pipeline from the Riverhounds Academy to professional women's soccer?", "Where is The Driver Era's tour scheduled to stop in Pittsburgh?", 'What can one explore with the free walking tours in Pittsburgh?', 'Which Pittsburgh festival involves a competitive pickle juice drinking contest?', 'What type of facility is the Inglis Innovation Center planning to include in its Bellevue location?', 'When is the Great American Banana Split Celebration in 2025?', 'When is Spring Carnival Weekend at CMU?', 'Which Pittsburgh food event features a competitive pickle juice drinking contest?', 'Where is the Big Nosh Jewish Food Festival taking place?', 'What type of artworks can one explore at The Andy Warhol Museum in Pittsburgh?', 'Which event celebrating nature will take place on March 22, 2025?', 'Where can one find 

In [None]:
# Checking whether the list that we have created is a List[List] or List[str]
for i in data_list:
    print(i)

Which festival is designed for vegetarian and vegan food lovers?
Which counties host major maple festivals near Pittsburgh?
Which new sports team is mentioned as completing the pipeline from the Riverhounds Academy to professional women's soccer?
Where is The Driver Era's tour scheduled to stop in Pittsburgh?
What can one explore with the free walking tours in Pittsburgh?
Which Pittsburgh festival involves a competitive pickle juice drinking contest?
What type of facility is the Inglis Innovation Center planning to include in its Bellevue location?
When is the Great American Banana Split Celebration in 2025?
When is Spring Carnival Weekend at CMU?
Which Pittsburgh food event features a competitive pickle juice drinking contest?
Where is the Big Nosh Jewish Food Festival taking place?
What type of artworks can one explore at The Andy Warhol Museum in Pittsburgh?
Which event celebrating nature will take place on March 22, 2025?
Where can one find free entertainment in downtown Pittsburgh

In [None]:
#Running and creating the LLM model

client = openai.OpenAI(
    api_key=os.environ.get("LITELLM_API_KEY"),
    base_url="https://cmu.litellm.ai",
)

# Assuming content_list and data_list are lists of strings
content_str = "\n".join(content_list)
output = []
for i in data_list:
    print(i)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant. You use data that is provided to you by the assistant role to answer questions posed by the user"},
            {"role": "assistant", "content": content_str},
            {"role": "user", "content": i}
        ]
    )
    print(response.choices[0].message.content)
    output.append(response.choices[0].message.content)

Which festival is designed for vegetarian and vegan food lovers?
The Pittsburgh Veg Fair is designed for vegetarian and vegan food lovers.
Which counties host major maple festivals near Pittsburgh?
The counties that host major maple festivals near Pittsburgh are Somerset County, which features the Pennsylvania Maple Festival in Meyersdale, and Indiana County, where the Indiana County Maple Weekend takes place. These festivals celebrate the maple syrup production heritage in the region.
Which new sports team is mentioned as completing the pipeline from the Riverhounds Academy to professional women's soccer?
The new sports team mentioned as completing the pipeline from the Riverhounds Academy to professional women's soccer is the Pittsburgh Hotspurs' new Women's Premier Soccer League team (WPSL).
Where is The Driver Era's tour scheduled to stop in Pittsburgh?
The Driver Era's tour is scheduled to stop at Stage AE in Pittsburgh.
What can one explore with the free walking tours in Pittsbur

In [46]:
with open('output.txt', 'w') as file:
    for item in output:
        file.write(f"{item}\n")

# Confirm that the file was created
import os
print(os.path.exists('output.txt'))

True


In [45]:
output

['The Pittsburgh Veg Fair is designed for vegetarian and vegan food lovers.',
 'The counties that host major maple festivals near Pittsburgh are Somerset County, which features the Pennsylvania Maple Festival in Meyersdale, and Indiana County, where the Indiana County Maple Weekend takes place. These festivals celebrate the maple syrup production heritage in the region.',
 "The new sports team mentioned as completing the pipeline from the Riverhounds Academy to professional women's soccer is the Pittsburgh Hotspurs' new Women's Premier Soccer League team (WPSL).",
 "The Driver Era's tour is scheduled to stop at Stage AE in Pittsburgh.",
 "Free walking tours in Pittsburgh provide an opportunity to explore various significant and vibrant parts of the city. These tours typically guide you through Downtown Pittsburgh, allowing you to experience the city's rich history, unique architecture, and cultural landmarks. While specific routes and highlights may vary, you can expect to see iconic s