In [6]:
import os
import pickle
import numpy as np
import pandas as pd

In [1]:

def load_all_intermediate(folder='intermediate_results'):
    """
    Load all per-app_id intermediate results from disk into a single dictionary.
    
    Returns:
        similarity_results: dict
            Keys are app_id, values are dicts with 'reviews' and 'similarity_matrix'.
    """
    similarity_results = {}
    if not os.path.exists(folder):
        print(f"No folder named '{folder}' found.")
        return similarity_results

    for filename in os.listdir(folder):
        if filename.endswith('.pkl'):
            app_id = filename.replace('.pkl', '')
            file_path = os.path.join(folder, filename)
            with open(file_path, 'rb') as f:
                similarity_results[app_id] = pickle.load(f)
    return similarity_results

# -------------------------------
# Example usage:
# -------------------------------
similarity_results = load_all_intermediate('intermediate_results')
print(f"Loaded {len(similarity_results)} app_ids from intermediate results.")


Loaded 10 app_ids from intermediate results.


In [7]:
# -------------------------------
# Filter reviews by threshold (any tag)
# -------------------------------
def filter_reviews_by_threshold(similarity_results, threshold=0.5):
    allowed_reviews = []
    for app_id, data in similarity_results.items():
        sim_matrix = data['similarity_matrix']
        review_texts = data['reviews']

        # Boolean mask: any tag above threshold
        mask = np.any(sim_matrix > threshold, axis=1)

        for review, keep in zip(review_texts, mask):
            if keep:
                allowed_reviews.append((app_id, review))

    return allowed_reviews


In [23]:
# Step 2: Filter later by threshold
allowed_reviews = filter_reviews_by_threshold(similarity_results, threshold=0.65)
allowed_reviews_df = pd.DataFrame(allowed_reviews, columns=['app_id', 'review_text'])


In [24]:
allowed_reviews_df

Unnamed: 0,app_id,review_text
0,10,One of the best action games in the world :|
1,10,Good old times when i played this game for the...
2,10,Best action game
3,10,point and click adventure
4,10,the best game in word
5,10,• Old game. • Old graphic. • Old engine. But ...
6,10,recommended game to start as first person shoo...
7,10,revolutionized first person shooters
8,10,Best Classic Action Shooter Ever Was Created!
9,10,The most balanced and competetive multiplayer ...
