In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chess Analysis Tool\n",
    "\n",
    "This notebook provides a detailed explanation and implementation of a Chess Analysis Tool. The tool can preprocess e-books, analyze PGN files, calculate win rates, and find the move with the lowest accuracy. Additionally, it includes a simple search engine for e-books."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Libraries\n",
    "\n",
    "First, we need to import the necessary libraries."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import pandas as pd\n",
    "import chess.pgn\n",
    "import nltk\n",
    "import docx\n",
    "from PyPDF2 import PdfReader\n",
    "from tkinter import Tk, filedialog, Label, Button, Entry, Text, Scrollbar, messagebox\n",
    "from nltk.corpus import stopwords\n",
    "\n",
    "nltk.download('punkt')\n",
    "nltk.download('stopwords')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tokenizer\n",
    "\n",
    "Define a simple tokenizer that removes stopwords."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simple_tokenizer(text):\n",
    "    \"\"\"Tokenizer sederhana yang menghapus stopword.\"\"\"\n",
    "    stop_words = set(stopwords.words('english'))\n",
    "    tokens = re.findall(r'\\b\\w+\\b', text.lower())\n",
    "    return [word for word in tokens if word not in stop_words]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preprocess E-book\n",
    "\n",
    "Function to preprocess text from a PDF or DOCX file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_text(file_path):\n",
    "    \"\"\"Preprocess text from a PDF or DOCX file.\"\"\"\n",
    "    try:\n",
    "        if file_path.endswith(\".pdf\"):\n",
    "            reader = PdfReader(file_path)\n",
    "            text = \" \".join([page.extract_text() for page in reader.pages])\n",
    "        elif file_path.endswith(\".doc\") or file_path.endswith(\".docx\"):\n",
    "            doc = docx.Document(file_path)\n",
    "            text = \" \".join([p.text for p in doc.paragraphs])\n",
    "        else:\n",
    "            raise ValueError(\"Unsupported file format. Only PDF and DOCX are supported.\")\n",
    "\n",
    "        return simple_tokenizer(text)\n",
    "    except Exception as e:\n",
    "        messagebox.showerror(\"Error\", f\"Failed to preprocess text: {e}\")\n",
    "        return []"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analyze PGN File\n",
    "\n",
    "Function to process PGN file and return a DataFrame with color information."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_pgn_file(pgn_file, color):\n",
    "    \"\"\"Process PGN file and return a DataFrame with color information.\"\"\"\n",
    "    games_data = []\n",
    "    try:\n",
    "        game = chess.pgn.read_game(pgn_file)\n",
    "\n",
    "        while game:\n",
    "            game_info = game.headers\n",
    "            game_moves = []\n",
    "            board = game.board()\n",
    "            for move in game.mainline_moves():\n",
    "                game_moves.append(board.san(move))\n",
    "                board.push(move)\n",
    "            games_data.append({\n",
    "                \"White\": game_info.get(\"White\", \"Unknown\"),\n",
    "                \"Black\": game_info.get(\"Black\", \"Unknown\"),\n",
    "                \"Date\": game_info.get(\"Date\", \"Unknown\"),\n",
    "                \"Result\": game_info.get(\"Result\", \"*\"),\n",
    "                \"Moves\": \" \".join(game_moves),\n",
    "                \"Color\": color\n",
    "            })\n",
    "            game = chess.pgn.read_game(pgn_file)\n",
    "    except Exception as e:\n",
    "        messagebox.showerror(\"Error\", f\"Error processing PGN file: {e}\")\n",
    "\n",
    "    return pd.DataFrame(games_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analyze Win Rate\n",
    "\n",
    "Function to calculate win rates for openings based on the first two moves."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def analyze_win_rate(df):\n",
    "    \"\"\"Calculate win rates for openings based on the first two moves.\"\"\"\n",
    "    if 'Moves' not in df.columns:\n",
    "        messagebox.showwarning(\"Warning\", \"Column 'Moves' not found in DataFrame.\")\n",
    "        return pd.DataFrame()\n",
    "\n",
    "    df['First_Two_Moves'] = df['Moves'].apply(lambda x: ' '.join(x.split()[:2]))\n",
    "    df['White_Result'] = df['Result'].map({'1-0': 1, '0-1': 0, '1/2-1/2': 0.5})\n",
    "    df['Black_Result'] = df['Result'].map({'1-0': 0, '0-1': 1, '1/2-1/2': 0.5})\n",
    "\n",
    "    white_win_rate = df[df['Color'] == 'White'].groupby('First_Two_Moves').agg(\n",
    "        total_games_white=('White_Result', 'count'),\n",
    "        win_rate_white=('White_Result', 'mean')\n",
    "    )\n",
    "\n",
    "    black_win_rate = df[df['Color'] == 'Black'].groupby('First_Two_Moves').agg(\n",
    "        total_games_black=('Black_Result', 'count'),\n",
    "        win_rate_black=('Black_Result', 'mean')\n",
    "    )\n",
    "\n",
    "    win_rate = pd.merge(white_win_rate, black_win_rate, left_index=True, right_index=True, how='outer').fillna(0)\n",
    "    win_rate = win_rate[(win_rate['total_games_white'] > 20) | (win_rate['total_games_black'] > 20)]\n",
    "\n",
    "    return win_rate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Find Move with Lowest Accuracy\n",
    "\n",
    "Function to find the move with the lowest accuracy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_min_accuracy_move(win_rate_df):\n",
    "    \"\"\"Find the move with the lowest accuracy.\"\"\"\n",
    "    if win_rate_df.empty:\n",
    "        messagebox.showwarning(\"Warning\", \"Win rate DataFrame is empty!\")\n",
    "        return None\n",
    "\n",
    "    try:\n",
    "        return win_rate_df.sort_values(by=['win_rate_white', 'win_rate_black'], ascending=True).index[0]\n",
    "    except Exception as e:\n",
    "        messagebox.showerror(\"Error\", f\"Error finding minimum accuracy move: {e}\")\n",
    "        return None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process All Files\n",
    "\n",
    "Function to process all PDF and DOCX files in a directory."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_all_files(directory):\n",
    "    \"\"\"Process all PDF and DOCX files in the directory.\"\"\"\n",
    "    tokens = []\n",
    "    for root, _, files in os.walk(directory):\n",
    "        for file in files:\n",
    "            if file.endswith(\".pdf\") or file.endswith(\".docx\"):\n",
    "                file_path = os.path.join(root, file)\n",
    "                tokens.extend(preprocess_text(file_path))\n",
    "    return tokens"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Main GUI Function\n",
    "\n",
    "Function to create the main GUI for the Chess Analysis Tool."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    def select_pgn_file():\n",
    "        file_path = filedialog.askopenfilename(title=\"Select PGN File\", filetypes=[(\"PGN Files\", \"*.pgn\")])\n",
    "        if file_path:\n",
    "            entry_pgn_file.delete(0, \"end\")\n",
    "            entry_pgn_file.insert(0, file_path)\n",
    "\n",
    "    def analyze_file():\n",
    "        pgn_file_path = entry_pgn_file.get()\n",
    "        color = entry_color.get()\n",
    "\n",
    "        if not os.path.exists(pgn_file_path):\n",
    "            messagebox.showerror(\"Error\", \"PGN file not found!\")\n",
    "            return\n",
    "\n",
    "        if color.lower() not in ['white', 'black']:\n",
    "            messagebox.showerror(\"Error\", \"Invalid color! Please enter 'White' or 'Black'.\")\n",
    "            return\n",
    "\n",
    "        with open(pgn_file_path, 'r') as pgn_file:\n",
    "            pgn_df = process_pgn_file(pgn_file, color.capitalize())\n",
    "\n",
    "        win_rate_df = analyze_win_rate(pgn_df)\n",
    "        if not win_rate_df.empty:\n",
    "            min_accuracy_move = find_min_accuracy_move(win_rate_df)\n",
    "            result_text.delete(1.0, \"end\")\n",
    "            result_text.insert(\"end\", f\"Lowest accuracy move: {min_accuracy_move}\\n\")\n",
    "            result_text.insert(\"end\", win_rate_df.to_string())\n",
    "\n",
    "    def search_engine():\n",
    "        search_query = \" \".join(global_tokens[:2]) if global_tokens else \"\"\n",
    "        result_text.delete(1.0, \"end\")\n",
    "\n",
    "        result_text.insert(\"end\", f\"Search Results for Query: '{search_query}'\\n\\n\")\n",
    "        matched_files = []\n",
    "\n",
    "        for root, _, files in os.walk(\"/home/ep/Documents/Github/Information_Retrieval_System/Analyze_E-book/Dataset/\"):\n",
    "            for file in files:\n",
    "                if file.endswith(\".pdf\") or file.endswith(\".docx\"):\n",
    "                    file_path = os.path.join(root, file)\n",
    "                    tokens = preprocess_text(file_path)\n",
    "                    if search_query in \" \".join(tokens):\n",
    "                        matched_files.append(file)\n",
    "\n",
    "        if matched_files:\n",
    "            for file in matched_files:\n",
    "                result_text.insert(\"end\", f\"- {file}\\n\")\n",
    "        else:\n",
    "            result_text.insert(\"end\", \"No matching files found.\\n\")\n",
    "\n",
    "    # GUI\n",
    "    root = Tk()\n",
    "    root.title(\"Chess Analysis Tool\")\n",
    "    root.geometry(\"800x600\")\n",
    "\n",
    "    # Close button\n",
    "    close_button = Button(root, text=\"X\", command=root.quit, bg=\"red\", fg=\"white\")\n",
    "    close_button.place(relx=1.0, rely=0.0, anchor=\"ne\")\n",
    "\n",
    "    Label(root, text=\"PGN File:\").grid(row=0, column=0, padx=10, pady=5, sticky=\"e\")\n",
    "    entry_pgn_file = Entry(root, width=50)\n",
    "    entry_pgn_file.grid(row=0, column=1, padx=10, pady=5)\n",
    "    Button(root, text=\"Browse\", command=select_pgn_file).grid(row=0, column=2, padx=10, pady=5)\n",
    "\n",
    "    Label(root, text=\"Color (White/Black):\").grid(row=1, column=0, padx=10, pady=5, sticky=\"e\")\n",
    "    entry_color = Entry(root, width=50)\n",
    "    entry_color.grid(row=1, column=1, padx=10, pady=5)\n",
    "\n",
    "    Button(root, text=\"Analyze\", command=analyze_file).grid(row=2, column=1, pady=10)\n",
    "    Button(root, text=\"Search Engine\", command=search_engine).grid(row=3, column=1, pady=10)\n",
    "\n",
    "    result_text = Text(root, wrap=\"word\", height=30, width=120)\n",
    "    result_text.grid(row=4, column=0, columnspan=3, padx=10, pady=5)\n",
    "\n",
    "    scrollbar = Scrollbar(root, command=result_text.yview)\n",
    "    result_text.configure(yscrollcommand=scrollbar.set)\n",
    "    scrollbar.grid(row=4, column=3, sticky=\"ns\")\n",
    "\n",
    "    # Proses otomatis semua file di direktori\n",
    "    global_tokens = process_all_files(\"/home/ep/Documents/Github/Information_Retrieval_System/Analyze_E-book/Dataset/\")\n",
    "\n",
    "    root.mainloop()\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}