# Ainnect Friend Recommendation System Demo

Notebook này sẽ demo từng bước của hệ thống gợi ý bạn bè:
1. Kết nối và lấy dữ liệu
2. Xây dựng đồ thị xã hội
3. Tính toán embeddings
4. Tìm kiếm ứng viên
5. Tính điểm và xếp hạng
6. Visualize kết quả


In [5]:
%pip install matplotlib


Collecting matplotlib
  Downloading matplotlib-3.10.7-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.3-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.60.1-cp312-cp312-win_amd64.whl.metadata (114 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.9-cp312-cp312-win_amd64.whl.metadata (6.4 kB)
Collecting pyparsing>=3 (from matplotlib)
  Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.7-cp312-cp312-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.1 MB ? eta -:--:--
   ----- ---------------------------------- 1.0/8.1 MB 2.8 MB/s eta 0:00:03
   ------- -------------------------------- 1


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
%pip install seaborn

Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
import mysql.connector
from typing import Dict, List, Set, Tuple

# Plotting settings
sns.set_theme(style='whitegrid')  # Use seaborn's default theme with grid
sns.set_palette('husl')
%matplotlib inline

# Increase font sizes for better readability
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10


## 1. Kết nối và lấy dữ liệu từ MySQL


In [None]:
def get_db_connection():
    return mysql.connector.connect(
        host='localhost',
        user='root',
        password='annguyen199',
        database='ainnect'
    )

def get_social_graph() -> nx.Graph:
    """Get social graph from friendships"""
    conn = get_db_connection()
    cursor = conn.cursor()
    
    # Get accepted friendships
    cursor.execute("""
        SELECT user_id_low, user_id_high
        FROM friendships
        WHERE status = 'accepted'
    """)
    edges = cursor.fetchall()
    
    # Get user info
    cursor.execute("""
        SELECT id, username, display_name
        FROM users
    """)
    users = {row[0]: {'username': row[1], 'display_name': row[2]} for row in cursor.fetchall()}
    
    cursor.close()
    conn.close()
    
    # Build graph
    G = nx.Graph()
    
    # Add nodes with attributes
    for user_id, attrs in users.items():
        G.add_node(user_id, **attrs)
    
    # Add edges
    G.add_edges_from(edges)
    
    return G

# Load graph
G = get_social_graph()
print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")


## 2. Visualize đồ thị xã hội
Vẽ đồ thị để thấy cấu trúc mạng xã hội và mối quan hệ giữa các users


In [None]:
def plot_social_graph(G: nx.Graph, highlight_user: int = None):
    plt.figure(figsize=(12, 8))
    
    # Layout
    pos = nx.spring_layout(G)
    
    # Node colors
    colors = ['lightblue' if node != highlight_user else 'red' for node in G.nodes()]
    
    # Draw
    nx.draw_networkx_nodes(G, pos, node_color=colors, node_size=500)
    nx.draw_networkx_edges(G, pos, alpha=0.2)
    
    # Labels
    labels = nx.get_node_attributes(G, 'username')
    nx.draw_networkx_labels(G, pos, labels)
    
    plt.title('Social Network Graph')
    plt.axis('off')
    plt.show()

# Plot full graph
plot_social_graph(G)

# Plot with highlighted user
test_user_id = 1  # Change this to test different users
plot_social_graph(G, test_user_id)


## 3. Tính toán User Features
Lấy và visualize các đặc trưng của users (interests, education, work, activities)


In [None]:
def get_user_features(user_id: int) -> pd.Series:
    """Get user features from database"""
    conn = get_db_connection()
    cursor = conn.cursor(dictionary=True)
    
    # Get user info and features
    cursor.execute("""
        SELECT 
            u.id,
            COUNT(DISTINCT i.id) as interest_count,
            COUNT(DISTINCT e.id) as education_count,
            COUNT(DISTINCT w.id) as work_count,
            COUNT(DISTINCT p.id) as post_count,
            COUNT(DISTINCT c.id) as comment_count,
            COUNT(DISTINCT r.id) as reaction_count
        FROM users u
        LEFT JOIN interests i ON u.id = i.user_id
        LEFT JOIN educations e ON u.id = e.user_id
        LEFT JOIN work_experiences w ON u.id = w.user_id
        LEFT JOIN posts p ON u.id = p.author_id
        LEFT JOIN comments c ON u.id = c.author_id
        LEFT JOIN reactions r ON u.id = r.user_id
        WHERE u.id = %s
        GROUP BY u.id
    """, (user_id,))
    
    features = cursor.fetchone()
    cursor.close()
    conn.close()
    
    return pd.Series(features)

# Get features for all users
user_features = pd.DataFrame([get_user_features(uid) for uid in G.nodes()])

# Plot feature distributions
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for i, col in enumerate(['interest_count', 'education_count', 'work_count', 
                        'post_count', 'comment_count', 'reaction_count']):
    sns.histplot(data=user_features, x=col, ax=axes[i])
    axes[i].set_title(f'Distribution of {col}')

plt.tight_layout()
plt.show()

# Print summary statistics
print("\nFeature Summary Statistics:")
print(user_features.describe())


## 4. Graph-based Features
Tính toán các đặc trưng dựa trên cấu trúc đồ thị như:
- Degree centrality
- Betweenness centrality
- Clustering coefficient
- PageRank


In [None]:
def compute_graph_metrics(G: nx.Graph, user_id: int) -> Dict[str, float]:
    metrics = {}
    
    # Degree centrality
    metrics['degree'] = nx.degree_centrality(G)[user_id]
    
    # Betweenness centrality
    metrics['betweenness'] = nx.betweenness_centrality(G)[user_id]
    
    # Clustering coefficient
    metrics['clustering'] = nx.clustering(G, user_id)
    
    # PageRank
    metrics['pagerank'] = nx.pagerank(G)[user_id]
    
    return metrics

# Compute metrics for all users
graph_metrics = pd.DataFrame([compute_graph_metrics(G, uid) for uid in G.nodes()])
graph_metrics.index = G.nodes()

# Plot metrics
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
axes = axes.ravel()

for i, col in enumerate(['degree', 'betweenness', 'clustering', 'pagerank']):
    sns.scatterplot(data=graph_metrics, y=col, x=graph_metrics.index, ax=axes[i])
    axes[i].set_title(f'{col} by user')
    
    # Highlight test user
    test_idx = graph_metrics.index.get_loc(test_user_id)
    axes[i].scatter(test_idx, graph_metrics.iloc[test_idx][col], 
                   color='red', s=100, label='Test User')
    axes[i].legend()

plt.tight_layout()
plt.show()

# Print metrics for test user
print(f"\nMetrics for test user {test_user_id}:")
print(graph_metrics.loc[test_user_id])


## 5. Recommendation Pipeline Demo
Demo toàn bộ pipeline gợi ý bạn bè:
1. Tìm ứng viên từ graph
2. Tính điểm dựa trên nhiều features
3. Visualize kết quả gợi ý


In [None]:
def get_recommendations(user_id: int, top_k: int = 5) -> List[Dict]:
    # Get user's current friends
    friends = set(G.neighbors(user_id))
    
    # Get 2-hop neighbors (friends of friends)
    candidates = set()
    for friend in friends:
        candidates.update(G.neighbors(friend))
    candidates = candidates - friends - {user_id}
    
    # Score candidates
    scores = []
    for candidate in candidates:
        # Common neighbors
        cn = len(set(G.neighbors(user_id)) & set(G.neighbors(candidate)))
        
        # Jaccard similarity
        union = len(set(G.neighbors(user_id)) | set(G.neighbors(candidate)))
        jaccard = cn / union if union > 0 else 0
        
        # Graph metrics similarity
        user_metrics = compute_graph_metrics(G, user_id)
        cand_metrics = compute_graph_metrics(G, candidate)
        metric_sim = sum(abs(user_metrics[k] - cand_metrics[k]) for k in user_metrics)
        
        # Feature similarity
        user_feat = user_features.loc[user_id]
        cand_feat = user_features.loc[candidate]
        feat_sim = np.corrcoef(user_feat, cand_feat)[0,1]
        feat_sim = 0 if np.isnan(feat_sim) else feat_sim
        
        # Combined score
        score = (
            0.3 * cn +           # Common neighbors
            0.3 * jaccard +      # Jaccard similarity
            0.2 * (1 - metric_sim) +  # Graph metrics (inverse distance)
            0.2 * feat_sim       # Feature similarity
        )
        
        scores.append({
            'user_id': candidate,
            'score': score,
            'common_neighbors': cn,
            'jaccard': jaccard,
            'metric_sim': metric_sim,
            'feature_sim': feat_sim
        })
    
    # Sort and return top-k
    scores.sort(key=lambda x: x['score'], reverse=True)
    return scores[:top_k]

# Get recommendations for test user
test_user_id = 1  # Change this to test different users
recs = get_recommendations(test_user_id)

# Plot recommendation graph
def plot_recommendations(G: nx.Graph, user_id: int, recommendations: List[Dict]):
    plt.figure(figsize=(15, 10))
    
    # Create subgraph with user, their friends, and recommended users
    nodes = {user_id} | set(G.neighbors(user_id)) | {r['user_id'] for r in recommendations}
    subG = G.subgraph(nodes)
    
    pos = nx.spring_layout(subG)
    
    # Draw nodes
    # User in red
    nx.draw_networkx_nodes(subG, pos, nodelist=[user_id], 
                          node_color='red', node_size=1000)
    
    # Friends in blue
    friends = list(G.neighbors(user_id))
    nx.draw_networkx_nodes(subG, pos, nodelist=friends,
                          node_color='lightblue', node_size=700)
    
    # Recommendations in green
    rec_nodes = [r['user_id'] for r in recommendations]
    nx.draw_networkx_nodes(subG, pos, nodelist=rec_nodes,
                          node_color='lightgreen', node_size=700)
    
    # Draw edges
    nx.draw_networkx_edges(subG, pos)
    
    # Labels
    labels = nx.get_node_attributes(subG, 'username')
    nx.draw_networkx_labels(subG, pos, labels)
    
    # Legend
    plt.plot([], [], 'ro', label='Target User')
    plt.plot([], [], 'o', color='lightblue', label='Friends')
    plt.plot([], [], 'o', color='lightgreen', label='Recommendations')
    plt.legend()
    
    plt.title(f'Recommendations for user {user_id}')
    plt.axis('off')
    plt.show()
    
    # Print recommendation details
    print("\nRecommendation Details:")
    for r in recommendations:
        user = G.nodes[r['user_id']]
        print(f"\nUser: {user['username']} ({user['display_name']})")
        print(f"Score: {r['score']:.3f}")
        print(f"Common neighbors: {r['common_neighbors']}")
        print(f"Jaccard similarity: {r['jaccard']:.3f}")
        print(f"Feature similarity: {r['feature_sim']:.3f}")

plot_recommendations(G, test_user_id, recs)
