In [None]:
"""
Author(s): 1. Hanzala b. Rehan
Description: Program to understand the dataset, and develop the game logic upon it.
Credits: Dataset taken from github.com/ZhannDark
Date Created: November 27th, 2024
Date Last Modified: December 4th, 2024
"""

In [1]:
import pandas as pd

# Load the data
df = pd.read_csv("data.csv")

In [2]:
df.shape
df.head()

Unnamed: 0,Character,"Is the character from ""Frozen""?",Is the character a mouse?,Is the character a duck?,Does your character have superpowers?,Is your character female?,Is your character is a positive character?,Is your character an animal?,Is your character a princess?,Does your character live under the sea?,...,Does your character have a vendetta against a princess named Aurora?,Does your character have a live-action film adaptation starring Angelina Jolie?,Does your character have black and white hair?,Does your character have a live-action film adaptation starring Emma Stone?,"Does your character have a number ""86"" on his doors?",Is your character considered a rival to Lightning McQueen?,"Is your character known for his catchphrase, ""Ka-chicka""?","Does your character have a strong vendetta against Mowgli, the human boy?","Does your character have a memorable line about the ""man's red flower"" (fire)?","Does your character appear in both the animated and live-action adaptations of ""The Jungle Book""?"
0,Mickey Mouse,0,1,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Elza,1,0,0,1,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,Woody,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Donald Duck,0,0,1,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Tinker Bell,0,0,0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
df['ID'] = range(1, len(df) + 1)
df = df.set_index('ID')

In [13]:
Questions = []

# Loop through each column (question)
for i, question in enumerate(df.columns):
    # Ensure the column contains only boolean values
    true_chars = set(df.index[df[question] == True].tolist())  # Characters with True
    false_chars = set(df.index[df[question] == False].tolist())  # Characters with False

    # Append the transformed row
    Questions.append({
        'ID': i + 1,  # Question ID (starting from 1)
        'Question': question,  # Question text
        'True_Characters': true_chars,  # List of IDs with True
        'False_Characters': false_chars  # List of IDs with False
    })

# Convert the result into a new DataFrame
Questions = pd.DataFrame(Questions)

# Ensure no extra whitespace in sets when saving
def clean_set(s):
    return str(s).replace(" ", "")  # Remove all spaces within the set representation

Questions['True_Characters'] = Questions['True_Characters'].apply(clean_set)
Questions['False_Characters'] = Questions['False_Characters'].apply(clean_set)

# Print the resulting DataFrame
print(Questions)

      ID                                           Question  \
0      1                                          Character   
1      2                    Is the character from "Frozen"?   
2      3                          Is the character a mouse?   
3      4                           Is the character a duck?   
4      5              Does your character have superpowers?   
..   ...                                                ...   
111  112  Is your character considered a rival to Lightn...   
112  113  Is your character known for his catchphrase, "...   
113  114  Does your character have a strong vendetta aga...   
114  115  Does your character have a memorable line abou...   
115  116  Does your character appear in both the animate...   

                   True_Characters  \
0                            set()   
1                        {2,20,28}   
2                              {1}   
3                              {4}   
4    {2,5,6,7,9,15,16,20,21,23,29}   
..             

In [10]:
Characters = []

for index, row in df.iterrows():
    Characters.append({
        'ID': index,
        'name': row['Character'],
        'image_path': f"characters_img/{row['Character']}.webp"
    })

Characters = pd.DataFrame(Characters)
print(Characters)

    ID             name                           image_path
0    1     Mickey Mouse     characters_img/Mickey Mouse.webp
1    2             Elza             characters_img/Elza.webp
2    3            Woody            characters_img/Woody.webp
3    4      Donald Duck      characters_img/Donald Duck.webp
4    5      Tinker Bell      characters_img/Tinker Bell.webp
5    6            Jafar            characters_img/Jafar.webp
6    7           Ursula           characters_img/Ursula.webp
7    8            Simba            characters_img/Simba.webp
8    9        Pinocchio        characters_img/Pinocchio.webp
9   10  Winnie The Pooh  characters_img/Winnie The Pooh.webp
10  11       Snow White       characters_img/Snow White.webp
11  12            Belle            characters_img/Belle.webp
12  13          Aladdin          characters_img/Aladdin.webp
13  14       Cinderella       characters_img/Cinderella.webp
14  15         Rapunzel         characters_img/Rapunzel.webp
15  16            Ariel 

In [14]:
# Save the DataFrame to a CSV file
Questions.to_csv('qs.csv', index=False, quotechar='"', quoting=2)
Characters.to_csv('characters.csv', index=False)

In [1]:
import csv

# Read the CSV file
input_file = "qs.csv"
output_file = "questions.csv"

with open(input_file, "r", newline="", encoding="utf-8") as infile, \
    open(output_file, "w", newline="", encoding="utf-8") as outfile:
    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        # Remove commas in the Question column
        row["Question"] = row["Question"].replace(",", "")

        # Replace commas in True_Characters and False_Characters with dots
        row["True_Characters"] = row["True_Characters"].replace(",", ".")
        row["False_Characters"] = row["False_Characters"].replace(",", ".")

        writer.writerow(row)

print(f"Processed data has been written to {output_file}.")

Processed data has been written to qs.csv.
