# Smart flash cards using LLMs

In [None]:
import os
import openai # make sure to pip install openai
import re
import json
import numpy as np
import pandas as pd
from collections import Counter
from typing import Any, Optional, Tuple, Dict, List, NamedTuple, Set
import scipy
import time

from pprint import pprint as pprint
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['figure.dpi'] = 100

import umap   # 一种降维算法
import hdbscan # 一种聚类算法
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
import tkinter as tk
from tkinter import ttk



from basic_utils import *
from basic_user_interface import *
from initial_card_processing import *
from knowledge_graph import *
from knowledge_graph_querying import *

## Process initial flashcards to extract info

In [None]:
# process my basic flashcards from online
csv_title = 'my_flash_cards_general' 
verbose=False
cards_df_abstraction_groups = get_cards_df_abstraction_groups_from_front_and_back_csv(csv_title, verbose=verbose)
save_cards_df_to_json(cards_df_abstraction_groups, csv_title + '_cards_df_abstraction_groups')

## Build a Knowledge Graph

In [None]:
# Reload basic cards to start to make knowledge graph 
cards_df = read_cards_df_from_json('my_flash_cards_general_cards_df_abstraction_groups')

print("Cards loaded:", len(cards_df))

In [None]:
# Build a graph from real flashcard data 
kGraph = KnowledgeGraph(lower_bound_epsilon=0.05)

In [None]:
# Add card deck to kGraph
card_deck = create_card_deck_from_dataframe_of_abstraction_groups(cards_df)
title_list = kGraph.add_card_deck(card_deck, verbose=True)
kGraph.update_all_embeddings(verbose=True)

## Visualize graph
### Build similarity matrix

In [None]:
# Visualize the card-card overlap. This is a bit slow to run (20-30 seconds)

cardIDs = np.array(list(kGraph.cards.keys()))
cardIDs.sort()  # low to high

similarity_metric = np.zeros((len(cardIDs), len(cardIDs)))

name_labels = [kGraph.cards[cardID].topic for cardID in range(len(similarity_metric))]

for cardID1 in cardIDs:
    emb_vec1 = kGraph.cards[cardID1].embedding_vector_trimmed
    similarity_metric[cardID1,cardID1] = 1.0  # diag is 1 by definition
    for cardID2 in cardIDs:
        if cardID2 > cardID1:
            emb_vec2 = kGraph.cards[cardID2].embedding_vector_trimmed
            inner_prod = emb_vec_inner_product(emb_vec1, emb_vec2)
            similarity_metric[cardID1, cardID2] = (inner_prod + 1e-13)/(1.0 + 1e-13)
            similarity_metric[cardID2, cardID1] = (inner_prod + 1e-13)/(1.0 + 1e-13)

In [None]:
'''
这是 matplotlib.pyplot 中的一个函数，用于一次性创建多个子图。
它返回两个对象：
fig: 表示整个图的容器（Figure）。
ax: 表示子图的容器（Axes）。这里返回的是一个包含两个子图的数组。
'''
fig, ax = plt.subplots(1,2, figsize=(6, 3))
'''
imshow:

这是一个用于绘制二维数组（矩阵）的函数，常用于可视化矩阵、图像或热图。
它会将矩阵 similarity_metric 的值映射为颜色。
参数解释:

similarity_metric: 这是一个二维数组（矩阵），表示某种“相似性指标”。
vmin=0, vmax=1: 指定颜色映射的值域，矩阵值小于 0 的部分会被映射为最小颜色，大于 1 的部分会被映射为最大颜色。
cmap='gnuplot2': 设置颜色映射表，这里使用的是 gnuplot2 配色方案。
'''
ax[0].imshow(similarity_metric, vmin=0, vmax=1, cmap='gnuplot2')
ax[0].set_xlabel('Card index')
ax[0].set_ylabel('Card index')
ax[0].set_title('Similarity Metric')
ax[1].hist(similarity_metric.flatten(), bins=100)
ax[1].set_xlabel('Card overlap')
ax[1].set_ylabel('Counts')
ax[1].set_title('Similarity Histogram')
ax[1].set_yscale('log') # 设置 y 轴为对数刻度
plt.subplots_adjust(wspace=0.4)
plt.show()

### Display a few nodes/cards

In [None]:
node_title = 'MCTS'
node = kGraph.nodes[node_title]
kGraph.display_object_overlaps(node)

card = kGraph.cards[4]
card.display(verbose=True)     
kGraph.display_object_overlaps(card)