In [2]:
import os
import pandas as pd
import numpy as np

def txt_to_csv(path):
    questions, key, dist1, dist2, dist3, dist4 = [], [], [], [], [], []
    with open(path, 'r', errors='ignore') as file:
        files = file.readlines()
        i = 0
        while i < len(files):
            if files[i].strip().startswith("#Q"):
                question = files[i].strip()[3:].strip()
                i += 1  # Move to the answer key line
                if i < len(files) and files[i].strip().startswith('^'):
                    correct_answer_line = files[i].strip()
                    correct_answer = correct_answer_line[1:].strip()  # Extract correct answer text
                    i += 1  # Advance to options
                else:
                    correct_answer = ''
                
                options = [np.nan, np.nan, np.nan, np.nan]
                option_keys = ['A', 'B', 'C', 'D']
                for j in range(4):
                    if i+j < len(files) and files[i+j].strip():
                        option_line = files[i+j].strip()
                        option_text = option_line[2:].strip() if option_line[1] == ' ' else option_line[1:].strip()
                        options[j] = option_text

                # Determine which option (A, B, C, D) is correct based on the extracted text
                correct_option = 'A' if correct_answer == options[0] else 'B' if correct_answer == options[1] else 'C' if correct_answer == options[2] else 'D' if correct_answer == options[3] else np.nan
                
                # Append question and options to their respective lists
                questions.append(question)
                key.append(correct_option)
                dist1.append(options[0])
                dist2.append(options[1])
                dist3.append(options[2])
                dist4.append(options[3])
                i += 4  # Move index past the options
            else:
                i += 1  # Increment index if not a question line or to skip empty lines

    # Construct and return the DataFrame
    return pd.DataFrame({
        "Questions": questions,
        "Correct": key,
        "A": dist1,
        "B": dist2,
        "C": dist3,
        "D": dist4
    })

def parse_files(source_path, destination_path):
    if not os.path.exists(destination_path):
        os.makedirs(destination_path, exist_ok=True)  # Ensure destination directory exists
    for filename in os.listdir(source_path):
        if filename.endswith('.txt'):
            print(f"Processing file: {filename}")
            path = os.path.join(source_path, filename)
            data = txt_to_csv(path)
            csv_filename = filename.replace('.txt', '.csv')
            data.to_csv(os.path.join(destination_path, csv_filename), index=False)
            print(f"Processed {filename} into {csv_filename}")

# Define your source and destination paths
source_path = '/Users/matthieudebeaucorps/Desktop/Projects/OpenTriviaQA/categories'
destination_path = '/Users/matthieudebeaucorps/Desktop/Projects/OpenTriviaQA/categories csv'

# Call the function to process files and create CSVs
parse_files(source_path, destination_path)


Processing file: music.txt
Processed music.txt into music.csv
Processing file: sports.txt
Processed sports.txt into sports.csv
Processing file: world.txt
Processed world.txt into world.csv
Processing file: movies.txt
Processed movies.txt into movies.csv
Processing file: animals.txt
Processed animals.txt into animals.csv
Processing file: video-games.txt
Processed video-games.txt into video-games.csv
Processing file: history.txt
Processed history.txt into history.csv
Processing file: science-technology.txt
Processed science-technology.txt into science-technology.csv
Processing file: literature.txt
Processed literature.txt into literature.csv
Processing file: geography.txt
Processed geography.txt into geography.csv


In [3]:
import random

def load_questions(category, categories_dir):
    df = pd.read_csv(f"{categories_dir}/{category}.csv")
    return df

def ask_question(df):
    idx = random.randint(0, len(df) - 1)
    row = df.iloc[idx]
    print("\n" + row["Questions"])
    print(f"A) {row['A']}")
    print(f"B) {row['B']}")
    print(f"C) {row['C']}")
    print(f"D) {row['D']}")
    return row["Correct"], row[f"{row['Correct']}"]

def game_loop(categories_dir, categories):
    while True:
        category = input(f"\nChoose a category {categories}: ").lower().replace(" ", "-")
        if category not in categories:
            print("Invalid category. Please choose a valid category.")
            continue
        df = load_questions(category, categories_dir)
        correct_answer, answer_text = ask_question(df)
        user_answer = input("Your answer (A-D): ").upper()
        if user_answer == correct_answer:
            print("Correct!")
        else:
            print(f"Incorrect! The correct answer was {correct_answer}: {answer_text}.")
        if input("Play again? (yes/no): ").lower() != "yes":
            break

# Set the directory where your CSV files are located
categories_dir = '/Users/matthieudebeaucorps/Desktop/Projects/OpenTriviaQA/categories csv'

# Define your categories based on the CSV files (make sure these match your file names exactly, minus the '.csv' extension)
categories = ["animals", "geography", "history", "literature", "movies", "music", "science-technology", "sports", "video-games", "world"]

# Start the game loop
game_loop(categories_dir, categories)



Butterflies do not have lungs. They breathe trough this organ.
A) The proboscis
B) The abdomen
C) The eyes
D) The legs
Incorrect! The correct answer was B: The abdomen.
