In [None]:
#@title Load the Universal Sentence Encoder's TF Hub module

from absl import logging

import tensorflow as tf

import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns

module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)


def plot_similarity(labels, features, rotation):
  corr = np.inner(features, features)
  sns.set(font_scale=1.2)
  g = sns.heatmap(
      corr,
      xticklabels=labels,
      yticklabels=labels,
      vmin=0,
      vmax=1,
      cmap="YlOrRd")
  g.set_xticklabels(labels, rotation=rotation)
  g.set_title("Semantic Textual Similarity")

def run_and_plot(messages_):
  message_embeddings_ = embed(messages_)
  plot_similarity(messages_, message_embeddings_, 90)


messages = [
    # Smartphones
    "I like my phone",
    "My phone is not good.",
    "Your cellphone looks great.",

    # Weather
    "Will it snow tomorrow?",
    "Recently a lot of hurricanes have hit the US",
    "Global warming is real",

    # Food and health
    "An apple a day, keeps the doctors away",
    "Eating strawberries is healthy",
    "Is paleo better than keto?",

    # Asking about age
    "How old are you?",
    "what is your age?",
]

run_and_plot(messages)

import warnings
warnings.filterwarnings('ignore')

# Recommendation System (No Graphs)

In [None]:
from absl import logging
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np

# Load the Universal Sentence Encoder
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(module_url)
print("module %s loaded" % module_url)

def embed(input):
  return model(input)

def find_top_similar_messages(input_message, messages, top_n=5):
  """
  Finds the top N messages most similar to the input message.

  Args:
    input_message: The message to compare against.
    messages: A list of messages to search within.
    top_n: The number of top similar messages to return.

  Returns:
    A list of tuples, each containing (similarity_score, message).
  """

  # 1. Add the input message into the messages list
  messages.append(input_message)
  # 2. Re-calculate embeddings for all messages (including the new one)
  message_embeddings_ = embed(messages)
  # 3. Calculate the correlation matrix
  correlation_matrix = np.inner(message_embeddings_, message_embeddings_)
  # 4. Get similarity scores
  input_message_index = messages.index(input_message)
  similarity_scores = correlation_matrix[input_message_index]

  # Create a list to store (similarity, message) pairs
  similarity_pairs = []
  for i, score in enumerate(similarity_scores):
    if i != input_message_index: # Exclude self-similarity
      similarity_pairs.append((score, messages[i]))

  # Sort the pairs in descending order of similarity
  similarity_pairs.sort(reverse=True, key=lambda x: x[0])

  return similarity_pairs[:top_n]

# Example usage:
messages = [
    # Smartphones
    "I like my phone",
    "My phone is not good.",
    "Your cellphone looks great.",

    # Weather
    "Will it snow tomorrow?",
    "Recently a lot of hurricanes have hit the US",
    "Global warming is real",

    # Food and health
    "An apple a day, keeps the doctors away",
    "Eating strawberries is healthy",
    "Is paleo better than keto?",

    # Asking about age
    "How old are you?",
    "what is your age?",
]

input_message = "My phone is good"
top_similar = find_top_similar_messages(input_message, messages)

print(f"Top 5 messages similar to '{input_message}':")
for score, message in top_similar:
  print(f"Similarity: {score:.4f} - '{message}'")

# ACTUAL MODEL BELOW

In [34]:
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np

# Load the Universal Sentence Encoder
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(module_url)
print("module %s loaded" % module_url)

def embed(input):
  return model(input)

def find_top_similar_messages(input_message, messages, top_n=5):
  """
  Finds the top N messages most similar to the input message.

  Args:
    input_message: The message to compare against.
    messages: A list or tensor of messages to search within.
    top_n: The number of top similar messages to return.

  Returns:
    A list of tuples, each containing (similarity_score, message).
  """

  # Convert messages to a list if it's a tensor (for flexibility)
  if tf.is_tensor(messages):
      messages = messages.numpy().tolist()

  # Encode the input message
  input_message_embedding = embed([input_message])

  # Encode all messages in a single batch
  message_embeddings = embed(messages)

  # Calculate similarity scores
  similarity_scores = np.inner(input_message_embedding, message_embeddings)[0]

  # Create a list to store (similarity, message) pairs
  similarity_pairs = []
  for i, score in enumerate(similarity_scores):
      similarity_pairs.append((score, messages[i]))

  # Sort the pairs in descending order of similarity
  similarity_pairs.sort(reverse=True, key=lambda x: x[0])

  return similarity_pairs[:top_n]

# Load your dataset
df = pd.read_csv('C:/Users/Harvey/Downloads/anime_summarized.csv')
column = df['summary'].tolist()  # Convert directly to a list

# Example usage (with correct list handling)
#input_message = "Hunter × Hunter is an anime series and manga series about a young boy named Gon Freecss who sets out to become a Hunter and find his father"


input_message = input("Enter your anime: ")

from ollama import chat
from pydantic import BaseModel, ValidationError
import json

class Synopsis(BaseModel):
    synopsis: str

response = chat(
    messages=[
        {
            'role': 'user',
            'content': f"Return the 2 sentence synopsis of this anime in the following JSON format: synopsis: 2 sentence synopsis here. STRICTLY RETURN ACCORDING TO JSON FORMAT anime: {input_message}"
        }
    ],
    model='llama3.2',
    format='json',  # Request JSON format directly
)

# Extract, parse, validate, and extract synopsis (with error handling)
response_content = response['message']['content']
try:
    response_json = json.loads(response_content)
    synopsis_obj = Synopsis(**response_json)
    synopsis_text = synopsis_obj.synopsis  # Get the synopsis value
    print(synopsis_text)

except (json.JSONDecodeError, ValidationError) as e:
    print(f"Error processing response: {e}")
    print(f"Raw response content: {response_content}")
    # Optional: Fallback synopsis here if needed


##########################################################################################################################
top_similar = find_top_similar_messages(input_message, column)

print(f"Top 5 messages similar to '{input_message}':")
for score, message in top_similar:
  row = df[df['summary'] == message]
  print(f"Similarity: {score:.4f} title: {row['title']}")
  # '{message}'

module https://tfhub.dev/google/universal-sentence-encoder/4 loaded


Enter your anime:  naruto


Error processing response: 1 validation error for Synopsis
synopsis
  Input should be a valid string [type=string_type, input_value=['In a world where ninjas... him when he was born.'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
Raw response content: {"anime": "Naruto", "synopsis": ["In a world where ninjas with supernatural abilities fight and protect their villages, Naruto Uzumaki dreams of becoming the leader of his village, Konohagakure.", "Alongside his teammates Sakura Haruno and Sasuke Uchiha, Naruto embarks on a journey to defeat the Nine-Tails, a powerful demon that was sealed within him when he was born."]}
Top 5 messages similar to 'naruto':
Similarity: 0.3236 title: 464    Naruto: Shippuuden Movie 2 - Kizuna
Name: title, dtype: object
Similarity: 0.3038 title: 406    Natsume Yuujinchou
Name: title, dtype: object
Similarity: 0.3018 title: 425    Terra Formars: Bugs 2-hen
Name: title, dtype: object
Similarity: 0.2967 titl