<a href="https://colab.research.google.com/github/SaeSaeeda/ComputerScience-2022-2025/blob/main/chatbot_final_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Required imports
import pandas as pd
import spacy
import random

In [None]:
# Data
df = pd.read_csv('/content/kdramas_info_with_cast_and_synopsis.csv')

# SpaCy named entity recognition
nlp = spacy.load("en_core_web_sm")

# Initialize scores
title_score = 0
actor_score = 0
keyword_score = 0

# Exclusion of the titles when wrong
temporary_exclusions = set()
# Main game loop
while True:
    # User input
    user_input = input("\n🔍 Provide a keyword ('keyword'), remember the actor ('actor'), title ('title'), or type 'quit' to exit: ").lower()

    # Exit the game
    if user_input == 'quit':
        print("\n🤖 Chatbot: Thanks for playing! See you next time! 🌟")
        # Reset temporary exclusions for a new game
        temporary_exclusions.clear()
        break

    # Keyword-based guess
    elif user_input == 'keyword':
        user_keyword = input("\n❓ What specific keywords come to mind? ")
        print("\n🤖 Chatbot: Let me think about that...")

        # spaCy for named entity recognition
        doc = nlp(user_keyword)
        cast_names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]

        df['combined_info'] = df['Series'] + ' ' + df['Synopsis'] + ' ' + df['Cast']

        matching_rows = df[~df['Series'].isin(temporary_exclusions)].copy()
        # Hierachy points in order of importance
        if not matching_rows.empty:
            matching_rows['score'] = matching_rows.apply(
                lambda x: sum(
                    (10 if word.lower() in str(x['Series']).lower() else 1) +
                    (5 if word.lower() in str(x['Synopsis']).lower() else 1) +
                    (2 if any(cast.lower() in str(x['Cast']).lower() for cast in cast_names) else 1)
                    for word in user_keyword.split()
                )
                if pd.notna(x['Series']) else 0,
                axis=1
            )

            best_drama_keyword = matching_rows.loc[matching_rows['score'].idxmax(), 'Series']

            print(f"\n🌟 Best Guess: {best_drama_keyword}")

            response = input(f"\nChatbot: Is it '{best_drama_keyword}'? (Type 'yay' for yes, 'nay' for no.): ").lower()

            if response == 'yay':
                print("\n😄 Chatbot: Yay! You're correct!")
                keyword_score += 10
                print(f"\n🌟 Your Total Title Score: {title_score}")
                print(f"🌟 Your Total Actor Score: {actor_score}")
                print(f"🌟 Your Total Keyword Score: {keyword_score}")
            elif response == 'nay':
                print(f"\nChatbot: Okay, let's try another word. Excluding '{best_drama_keyword}' for now.")
                temporary_exclusions.add(best_drama_keyword)
                if not matching_rows.empty:
                    print("\nChatbot: Lets try other keywords:")
                else:
                    print("Chatbot: I couldn't find relevant information based on your keyword.")
            else:
                print("\nChatbot: Please type 'yay' for yes, 'nay' for no.")

    # Title-based guess
    elif user_input == 'title':
        while True:
            user_input_title = input("\n📖 Enter the K-drama title: ")
            matching_rows_title = df[~df['Series'].isin(temporary_exclusions)].copy()
            matching_rows_title = matching_rows_title[matching_rows_title['Series'].str.lower().fillna('').str.contains(user_input_title.lower())]

            if not matching_rows_title.empty:
                matching_rows_title['score'] = matching_rows_title.apply(
                    lambda x: sum(
                        (10 if word.lower() in str(x['Series']).lower() else 1) +
                        (5 if word.lower() in str(x['Synopsis']).lower() else 1) +
                        (2 if word.lower() in str(x['Cast']).lower() else 1)
                        for word in user_input_title.split()
                    )
                    if pd.notna(x['Series']) else 0,
                    axis=1
                )
                # Snippet of the Synopsis in form of a summary
                best_drama_title = matching_rows_title.loc[matching_rows_title['score'].idxmax(), 'Series']
                best_drama_summary = matching_rows_title.loc[matching_rows_title['score'].idxmax(), 'Synopsis'][:250]

                print(f"\n🌟 Best Guess: {best_drama_title}")
                print(f"📄 Plot: {best_drama_summary}")

                response = input(f"\n🤖 Chatbot: Is it '{best_drama_title}'? (Type 'yay' for yes, 'nay' for no): ").lower()

                if response == 'yay':
                    print("\n🎉 Chatbot: Yay! You're a K-Drama genius! Thanks for playing! 🎊")
                    title_score += 10
                    print(f"\n🌟 Your Total Title Score: {title_score}")
                    print(f"🌟 Your Total Actor Score: {actor_score}")
                    print(f"🌟 Your Total Keyword Score: {keyword_score}")
                    break
                elif response == 'nay':
                    print(f"\nChatbot: Okay, let's try another word. Excluding '{best_drama_title}' for now.")
                    temporary_exclusions.add(best_drama_title)
                    if not matching_rows_title.empty:
                        print("\nChatbot: Lets try again")
                    else:
                        print("Chatbot: I couldn't find relevant information based on your title.")
                else:
                    print("\nChatbot: Please type 'yay' for yes, 'nay' for no")
            else:
                print("\nChatbot: Couldn't find a matching K-drama. Please try again.")

    # Actor-based guess
    elif user_input == 'actor':
        user_input_actor = input("\n🕵️ Enter the actor's name: ")
        matching_rows_actor = df[~df['Cast'].isin(temporary_exclusions)].copy()
        matching_rows_actor = matching_rows_actor[matching_rows_actor['Cast'].str.lower().fillna('').str.contains(user_input_actor.lower())]

        if not matching_rows_actor.empty:
            matching_rows_actor['score'] = matching_rows_actor.apply(
                lambda x: sum(
                    (10 if word.lower() in str(x['Series']).lower() else 1) +
                    (5 if word.lower() in str(x['Synopsis']).lower() else 1) +
                    (2 if word.lower() in str(x['Cast']).lower() else 1)
                    for word in user_input_actor.split()
                )
                if pd.notna(x['Series']) else 0,
                axis=1
            )

            best_drama_actor = matching_rows_actor.loc[matching_rows_actor['score'].idxmax(), 'Series']

            print(f"\n🌟 Best Guess: {best_drama_actor}")

            response_actor = input(f"\n🤖 Chatbot: Was my guess '{best_drama_actor}' correct? (Type 'yay' for yes, 'nay' for no, or 'quit' to exit): ").lower()

            if response_actor == 'yay':
                print("\n😄 Chatbot: Yay! You're a K-Drama genius! Thanks for playing! 🎊")
                actor_score += 10
                print(f"\n🌟 Your Total Title Score: {title_score}")
                print(f"🌟 Your Total Actor Score: {actor_score}")
                print(f"🌟 Your Total Keyword Score: {keyword_score}")
            elif response_actor == 'nay':
                print("\n🤔 Chatbot: Oops! Let's keep the fun going!")
            else:
                print("\n❗ Chatbot: Please type 'yay' for yes, 'nay' for no, or 'quit' to exit.")
        else:
            print(f"\n🤔 Chatbot: Couldn't find a K-drama for the actor '{user_input_actor}'. Please try again.")
            print("\n😅 Chatbot: Don't worry! Let's keep the fun rolling!")

    else:
        print("\n❗ Chatbot: Please type 'keyword', 'title', 'actor', or 'quit'.")
