In [None]:

import pandas as pd
import numpy as np
import pickle
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

# Configuration
DATASET_PATH = 'datasets/steam-games-cleaned.csv'
MODEL_PATH = 'models/recommender_model.pkl'

def build_and_save_model():
    print("Loading dataset...")
    if not os.path.exists('models'):
        os.makedirs('models')
        
    df = pd.read_csv(DATASET_PATH)
    
    # Preprocessing
    df['genres'] = df['genres'].fillna('')
    df['tags'] = df['tags'].fillna('')
    df['review_score'] = df['review_score'].fillna(0)
    df['review_count'] = df['review_count'].fillna(0)
    
    # Feature Engineering (Soup)
    # Weight Genres 3x, Tags 1x, Title 1x
    df['soup'] = (
        (df['genres'] + " ") * 3 + 
        df['tags'].str.replace(',', ' ') + " " +
        df['title']
    )
    
    # Normalize scores for ranking
    scaler = MinMaxScaler()
    df['norm_score'] = scaler.fit_transform(
        (df['review_score'] * np.log1p(df['review_count'])).values.reshape(-1, 1)
    )
    
    print("Building Vector Space Model (TF-IDF)...")
    # ngram_range=(1, 2) is important for compound terms
    tfidf = TfidfVectorizer(stop_words='english', min_df=5, ngram_range=(1, 2))
    tfidf_matrix = tfidf.fit_transform(df['soup'])
    
    print(f"Building Nearest Neighbors Index ({tfidf_matrix.shape[0]} items)...")
    # Metric='cosine' is equivalent to 1 - cosine_similarity
    nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
    nn_model.fit(tfidf_matrix)
    
    # Bundle everything into a dictionary
    model_data = {
        'df': df[['app_id', 'title', 'genres', 'tags', 'norm_score', 'review_count']], # Keep only essential cols
        'tfidf_vectorizer': tfidf,
        'tfidf_matrix': tfidf_matrix, # Needed for user profile calculation
        'nn_model': nn_model
    }
    
    print(f"Saving model to {MODEL_PATH}...")
    with open(MODEL_PATH, 'wb') as f:
        pickle.dump(model_data, f)
        
    print("Model saved successfully!")

if __name__ == "__main__":
    build_and_save_model()
