In [None]:
"""
Author: Lai ZhonPoa
"""
# Bryans individual is not limited to BryanIndividual.ipynb
from neo4j import GraphDatabase
import matplotlib.pyplot as plt
import networkx as nx
from UtilsNeo4J import DataBaseHandler
from UtilsRedis import Redis_Utilities

# Setup Neo4j driver and Redis client 22/12/2024
neo4j_uri = "neo4j+s://75fb82ba.databases.neo4j.io"
neo4j_user = "neo4j"
neo4j_password = "E2znDHtP7x2Hs0B5_BM1tnglu6fTkM5YPTX18DkubIk" # Replace with your actual password
redis_utils = Redis_Utilities()

db_handler = DataBaseHandler(neo4j_uri, neo4j_user, neo4j_password, redis_utils)

# Get the total number of unique entries in the lexicon
total_unique_entries = db_handler.get_total_unique_entries()
print(f"Total number of unique entries: {total_unique_entries}")

In [None]:
# Graph Visualization Functions
def fetch_synonyms(tx, limit=25):
    query = f"""
    MATCH (w:Word)-[:SYNONYM]-(s:Word)
    RETURN w.word AS word, collect(DISTINCT s.word) AS synonyms
    LIMIT {limit}
    """
    result = tx.run(query)
    return result.values()

def create_synonym_network(db_handler, limit=25):
    with db_handler.neo4j_driver.session() as session:
        synonyms = session.read_transaction(fetch_synonyms, limit)
    
    G = nx.Graph()
    for word, syns in synonyms:
        for syn in syns:
            G.add_edge(word, syn)
    
    return G

def visualize_network(G):
    pos = nx.spring_layout(G, k=0.55)
    plt.figure(figsize=(20, 10))
    nx.draw(G, pos, with_labels=True, node_color='lightblue', edge_color='gray', node_size=5000, font_size=13)
    plt.title("Synonym Network")
    plt.show()

def identify_clusters(G):
    clusters = nx.community.greedy_modularity_communities(G)
    themes = {i: list(cluster) for i, cluster in enumerate(clusters)}
    return themes

G = create_synonym_network(db_handler, limit=30)  # Limit to 25 words
visualize_network(G)

themes = identify_clusters(G)
for theme_id, words in themes.items():
    print(f"Theme {theme_id}: {', '.join(words)}")

In [None]:
from UtilsRedis import Redis_Utilities
import redis
word_input = "sedih"
def get_word_data(word_to_search):
    redis_utils = Redis_Utilities()
    
    sentiment_data = redis_utils.get_sentiment(word_to_search)
    
    synonyms = db_handler.get_synonyms(word_to_search)
    antonyms = db_handler.get_antonyms(word_to_search)
    
    print(f"Synonyms for '{word_to_search}': {', '.join(synonyms)}")
    print(f"Antonyms for '{word_to_search}': {', '.join(antonyms)}")
    print(f"Sentiment for '{word_to_search}':", sentiment_data)

get_word_data("sedih")
get_word_data("gembira")
get_word_data("a")
get_word_data("hasil")

In [None]:
def get_top_and_bottom_frequencies(frequencies, top_n=20):
    # Convert frequency values to integers for sorting
    freq_dict = {word: int(freq) for word, freq in frequencies.items()}
    
    # Get the top N most used words
    most_used = sorted(freq_dict.items(), key=lambda item: item[1], reverse=True)[:top_n]
    
    # Get the top N least used words
    least_used = sorted(freq_dict.items(), key=lambda item: item[1])[:top_n]

    # Get all words used exactly once
    once_used = [word for word, freq in freq_dict.items() if freq == 1]
    
    return most_used, least_used, once_used

# Retrieve frequencies from Redis
frequencies = redis_utils.get_all_word_frequencies()

# Get the top and bottom frequencies
most_used, least_used, once_used = get_top_and_bottom_frequencies(frequencies)

# Calculate the maximum length of the words for alignment
max_length_most = max(len(word) for word, freq in most_used)
max_length_least = max(len(word) for word, freq in least_used)

# Print the results in a tidier format
print("\nWords used most:")
for word, freq in most_used:
    print(f"{word.ljust(max_length_most)}: {freq}")

print("\nWords used least:")
for word, freq in least_used:
    print(f"{word.ljust(max_length_least)}: {freq}")

print("\nWords used once:")
print(", ".join(once_used))