In [None]:
import pandas as pd
import re
import speech_recognition as sr
from datetime import datetime

# Load Excel dataset
df = pd.read_excel("Final Cricket Dataset.xlsx")

# Convert 'Last Match Date' to datetime
df["Last Match Date"] = pd.to_datetime(df["Last Match Date"], errors='coerce')

# Preprocess names
player_names = df["Name"].str.lower().tolist()

# Greet user
def greet():
    print("Hello! I'm CrickChat - your friendly PCB cricket assistant.")
    print("Ask me anything about Pakistani players - stats, profiles, or recent matches!\n")

# Helper: get closest matching player name
def get_player_name(query):
    for name in player_names:
        if name in query.lower():
            return name
    return None

# Check if player is a top scorer (top 5 by international runs)
def is_top_scorer(player_name):
    top_players = df.sort_values(by='International Runs', ascending=False).head(5)
    return player_name.lower() in top_players["Name"].str.lower().values

# Main function
def ask_crickchat(question: str) -> str:
    question = question.lower()
    player_name = get_player_name(question)

    if player_name:
        row = df[df["Name"].str.lower() == player_name].iloc[0]

        # Age
        if "age" in question:
            return f"{row['Name']} is {row['Age']} years old."

        # DOB
        if "date of birth" in question or "dob" in question or "born" in question:
            return f"{row['Name']} was born on {row['DOB']}."

        # Place
        if "place" in question or "belong" in question or "from" in question:
            return f"{row['Name']} is from {row['Place']}."

        # Format-specific runs
        if "test run" in question:
            return f"{row['Name']} has scored {row['Test Runs']} runs in Test matches."
        if "odi run" in question:
            return f"{row['Name']} has scored {row['ODI Runs']} runs in ODIs."
        if "t20 run" in question:
            return f"{row['Name']} has scored {row['T20 Runs']} runs in T20 Internationals."
        if "international run" in question:
            return f"{row['Name']} has scored {row['International Runs']} international runs in total."

        # Maximum Score
        if "best performance" in question or "perform best" in question or "maximum" in question or "highest score" in question:
            return f"{row['Name']}'s best performance was when they scored {row['Maximum Score']} runs."

        # Last Match
        if ("last match date" in question) or ("when did" in question and "last match" in question) or ("last played" in question):
            return f"{row['Name']}'s last match was on {row['Last Match Date'].strftime('%B %d, %Y')}."
        if "last match venue" in question or "where did" in question and "last match" in question:
            return f"{row['Name']} played their last match at {row['Last Match Venue']}."
        if "last match run" in question or "how many runs" in question:
            return f"In the last match, {row['Name']} scored {row['Runs in Last Match']} runs."

        # General info
        if "info" in question or "tell me" in question or "profile" in question:
            return (
                f"Here's what I found about {row['Name']}:\n"
                f"- Age: {row['Age']}\n"
                f"- Born: {row['DOB']} in {row['Place']}\n"
                f"- Test Runs: {row['Test Runs']}\n"
                f"- ODI Runs: {row['ODI Runs']}\n"
                f"- T20 Runs: {row['T20 Runs']}\n"
                f"- International Runs: {row['International Runs']}\n"
                f"- Highest Score: {row['Maximum Score']}\n"
                f"- Last Match: {row['Last Match Date'].strftime('%B %d, %Y')} at {row['Last Match Venue']}, scoring {row['Runs in Last Match']} runs."
            )

        # How good is player?
        if "how good" in question or "is" in question and "top scorer" in question:
            if is_top_scorer(player_name):
                return f"Yes, {row['Name']} is among the top international run-scorers for Pakistan!"
            else:
                return f"{row['Name']} is a solid performer, but not currently in the top 5 for international runs."

        return f"I'm not sure how to answer that about {row['Name']}, but I'm learning!"

    # Global/Statistical Questions

    if "youngest" in question:
        row = df.loc[df["Age"].idxmin()]
        return f"The youngest player is {row['Name']}, who is {row['Age']} years old."

    if "oldest" in question:
        row = df.loc[df["Age"].idxmax()]
        return f"The oldest player is {row['Name']}, aged {row['Age']}."

    if "most test runs" in question or "highest test runs" in question:
        row = df.loc[df["Test Runs"].idxmax()]
        return f"The player with the highest Test runs is {row['Name']} with {row['Test Runs']} runs."

    if "least test runs" in question:
        row = df.loc[df["Test Runs"].idxmin()]
        return f"The player with the least Test runs is {row['Name']} with only {row['Test Runs']} runs."

    if "most odi runs" in question or "highest odi runs" in question:
        row = df.loc[df["ODI Runs"].idxmax()]
        return f"The player with the highest ODI runs is {row['Name']} with {row['ODI Runs']} runs."

    if "least odi runs" in question:
        row = df.loc[df["ODI Runs"].idxmin()]
        return f"The player with the least ODI runs is {row['Name']} with only {row['ODI Runs']} runs."

    if "most t20 runs" in question:
        row = df.loc[df["T20 Runs"].idxmax()]
        return f"The highest T20 run scorer is {row['Name']} with {row['T20 Runs']} runs."

    if "least t20 runs" in question:
        row = df.loc[df["T20 Runs"].idxmin()]
        return f"The player with the least T20 runs is {row['Name']} with just {row['T20 Runs']} runs."

    if "most international runs" in question or "top scorer" in question:
        row = df.loc[df["International Runs"].idxmax()]
        return f"The highest international run scorer is {row['Name']} with a total of {row['International Runs']} runs."

    if "least international runs" in question:
        row = df.loc[df["International Runs"].idxmin()]
        return f"The player with the least international runs is {row['Name']} with just {row['International Runs']} runs."

    if "most runs in last match" in question or "best performance in last match" in question:
        row = df.loc[df["Runs in Last Match"].idxmax()]
        return f"{row['Name']} scored the most in their last match — {row['Runs in Last Match']} runs."

    if "not played for a long time" in question or "hasn't played recently" in question:
        df_sorted = df.sort_values(by="Last Match Date").head(3)
        response = "Here are players who haven't played in the longest time:\n"
        for _, r in df_sorted.iterrows():
            response += f"- {r['Name']} (last match on {r['Last Match Date'].strftime('%B %d, %Y')})\n"
        return response.strip()

    if "last match recently" in question or "most recent match" in question:
        row = df.loc[df["Last Match Date"].idxmax()]
        return f"The most recent match was played by {row['Name']} on {row['Last Match Date'].strftime('%B %d, %Y')}."

    return "Sorry, I didn't quite get that. Try asking in a different way — I'm here to help!"


def listen_to_user():
    recognizer = sr.Recognizer()
    mic = sr.Microphone()
    try:
        with mic as source:
            print("Listening... (speak clearly)")
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source, timeout=5, phrase_time_limit=7)
            user_text = recognizer.recognize_google(audio)
            return user_text
    except sr.UnknownValueError:
        return "Sorry, I didn't catch that."
    except sr.WaitTimeoutError:
        return "Listening timed out. Please try again."
    except KeyboardInterrupt:
        print("\nExiting CrickChat.")
        exit()
        
        
# -----------------------------------------
# Run the chatbot
# -----------------------------------------
greet()

recognizer = sr.Recognizer()
mic = sr.Microphone()

while True:
    # Prompt for voice or text
    mode = input("Press [V] for voice or [T] for text (or type 'exit'): ").lower()
    
    if mode == 'exit':
        print("Thanks for chatting with CrickChat. Take care!")
        break

    if mode == 'v':
        query = listen_to_user()
        print("You said:", query)
        if "sorry" in query.lower() or "timed out" in query.lower():
            continue  # skip processing this round
    elif mode == 't':
        query = input("You: ")
    else:
        print("Invalid option. Please press 'V' or 'T'.")
        continue

    if query.lower() in ["exit", "quit", "bye"]:
        print("Thanks for chatting with CrickChat. Take care!")
        break

    response = ask_crickchat(query)
    print("CrickChat:", response)



Hello! I'm CrickChat - your friendly PCB cricket assistant.
Ask me anything about Pakistani players - stats, profiles, or recent matches!

Listening... (speak clearly)
You said: tell me about
CrickChat: Sorry, I didn't quite get that. Try asking in a different way — I'm here to help!
Invalid option. Please press 'V' or 'T'.
Listening... (speak clearly)
You said: tell me about Shan Masood
CrickChat: Here's what I found about Shan Masood:
- Age: 35
- Born: 1989-10-14 00:00:00 in Kuwait City, Kuwait
- Test Runs: 2159
- ODI Runs: 620
- T20 Runs: 380
- International Runs: 3159
- Highest Score: 175 (Test)
- Last Match: January 06, 2024 at Sydney Cricket Ground, scoring 55 runs.
