<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Step-1:-Load-Journal-Data" data-toc-modified-id="Step-1:-Load-Journal-Data-1">Step 1: Load Journal Data</a></span></li><li><span><a href="#Step-2:-Generate-Embeddings" data-toc-modified-id="Step-2:-Generate-Embeddings-2">Step 2: Generate Embeddings</a></span></li><li><span><a href="#Step-3:-Reduce-Dimensions-with-PCA" data-toc-modified-id="Step-3:-Reduce-Dimensions-with-PCA-3">Step 3: Reduce Dimensions with PCA</a></span></li><li><span><a href="#Step-4:-Perform-K-Means-Clustering" data-toc-modified-id="Step-4:-Perform-K-Means-Clustering-4">Step 4: Perform K-Means Clustering</a></span></li><li><span><a href="#Step-5:-Visualize-Vector-Embeddings" data-toc-modified-id="Step-5:-Visualize-Vector-Embeddings-5">Step 5: Visualize Vector Embeddings</a></span></li><li><span><a href="#Step-6:-Filter-Entries-and-Create-Documents" data-toc-modified-id="Step-6:-Filter-Entries-and-Create-Documents-6">Step 6: Filter Entries and Create Documents</a></span></li><li><span><a href="#Step-7:-Create-Chroma-Vector-Store" data-toc-modified-id="Step-7:-Create-Chroma-Vector-Store-7">Step 7: Create Chroma Vector Store</a></span></li><li><span><a href="#Step-8:-Query-LLM" data-toc-modified-id="Step-8:-Query-LLM-8">Step 8: Query LLM</a></span></li></ul></div>

## Step 1: Load Journal Data

In [1]:
from langchain_community.vectorstores import Chroma
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain.docstore.document import Document
from langchain_ollama.llms import OllamaLLM
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.express as px

# Load your journal data
journal_df = pd.read_csv("daily_track_records_2024.csv")

## Step 2: Generate Embeddings

In [None]:
# Combine meaningful columns into a single text field
journal_df['text'] = journal_df.apply(
    lambda row: f"Time: {row['Time']}, Day: {row['Day']}, Type: {row['Type']}, Name: {row['Name']}", 
    axis=1
)

# Generate embeddings using OllamaEmbeddings
embeddings_model = OllamaEmbeddings(model="nomic-embed-text")
journal_df['embedding'] = journal_df['text'].apply(
    lambda x: embeddings_model.embed_query(x)
)

# Save embeddings for later use
journal_df.to_pickle("journal_with_ollama_embeddings.pkl")
print("Ollama embeddings created and saved successfully!") 

## Step 3: Reduce Dimensions with PCA

In [None]:
# Load embeddings
journal_df = pd.read_pickle("journal_with_ollama_embeddings.pkl")

# Reduce dimensions using PCA
pca = PCA(n_components=3)  # Reduce to 3 dimensions for visualization
embeddings = np.array(journal_df['embedding'].tolist())
reduced_embeddings = pca.fit_transform(embeddings)

# Add PCA results to the DataFrame
journal_df['pca_x'] = reduced_embeddings[:, 0]
journal_df['pca_y'] = reduced_embeddings[:, 1]
journal_df['pca_z'] = reduced_embeddings[:, 2]

## Step 4: Perform K-Means Clustering

In [None]:
# Perform clustering using K-Means
num_clusters = 10  # Adjust as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
journal_df['cluster'] = kmeans.fit_predict(journal_df[['pca_x', 'pca_y', 'pca_z']])

## Step 5: Visualize Vector Embeddings

In [None]:
# Create a hover_text column for visualization
journal_df['hover_text'] = journal_df.apply(
    lambda row: f"{row['Name']} ({row['Type']})" if pd.notna(row['Name']) else row['Type'],
    axis=1

# Create an interactive 3D scatter plot
fig = px.scatter_3d(
    journal_df,
    x='pca_x',
    y='pca_y',
    z='pca_z',
    color='cluster',
    hover_name='hover_text',  # Use the hover_text column
    title="Journal Entry Embeddings (3D PCA with Clustering)",
    labels={
        'pca_x': 'PCA Dimension 1',
        'pca_y': 'PCA Dimension 2',
        'pca_z': 'PCA Dimension 3',
        'cluster': 'Cluster'
    }
)

# Customize layout
fig.update_layout(
    width=800,
    height=800,
    margin=dict(l=10, r=10, t=40, b=10),
    scene=dict(
        xaxis_title="PCA D1",
        yaxis_title="PCA D2",
        zaxis_title="PCA D3"
    )
)

# Adjust dot size and transparency
fig.update_traces(marker=dict(size=3, opacity=0.8))
fig.show()

## Step 6: Filter Entries and Create Documents

In [None]:
types_to_exclude = [
    "Schedule - Get Up + Routine",
    "Schedule - Sleep + Routine",
    "Meal - Breakfast",
    "Meal - Brunch",
    "Meal - Lunch",
    "Meal - Prep"
]

# Filter journal entries based on excluded types
filtered_journal_df = journal_df[~journal_df['Type'].isin(types_to_exclude)]

# Create Document objects for Chroma
documents = [
    Document(page_content=row['text'], metadata={"row_id": idx})
    for idx, row in filtered_journal_df.iterrows()
]

## Step 7: Create Chroma Vector Store

In [None]:
# Create Chroma Vector Store
embedding_model = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents, embedding=embedding_model)

## Step 8: Query LLM

In [None]:
# Example query
query = "What new skills or habits did I develop this year?"
response = chat_with_journal(query, vectorstore, llm)

print("LLM Response:", response)