In [18]:
###################################################################################################
# GAME REVIEW GENERATION USING CHARACTER-LEVEL RNN
###################################################################################################

import string
import random
import re
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from surprise import BaselineOnly

# Set device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(f"Using device: {device}")

# Define character set
all_characters = string.printable
n_characters = len(all_characters)
print(f"Number of characters: {n_characters}")

# Extract and prepare comments from pdAvis
def prepare_review_text():
    print("Preparing review text data...")
    comments = pdAvis[['comment', 'note']].dropna(subset=['comment'])
    comments = comments[comments['comment'].str.len() > 20]
    
    # Clean comments
    def clean_comment(text):
        text = re.sub(r'[^\x00-\x7F]+', '', text)  # Remove non-ASCII
        text = re.sub(r'\s+', ' ', text).strip()   # Normalize whitespace
        return text
    
    comments['clean_comment'] = comments['comment'].apply(clean_comment)
    
    comments_by_rating = {}
    for rating_range in [(0, 3), (3, 5), (5, 7), (7, 10)]:
        range_comments = comments[(comments['note'] >= rating_range[0]) & 
                                 (comments['note'] < rating_range[1])]
        if len(range_comments) > 0:
            text = " <REVIEW> ".join(range_comments['clean_comment'].tolist())
            comments_by_rating[f"{rating_range[0]}-{rating_range[1]}"] = text
            print(f"Rating range {rating_range}: {len(range_comments)} reviews, {len(text)} characters")
    
    all_text = " <REVIEW> ".join(comments['clean_comment'].tolist())
    comments_by_rating['all'] = all_text
    print(f"Total: {len(comments)} reviews, {len(all_text)} characters")
    
    return comments_by_rating

review_texts = prepare_review_text()


def char_tensor(string):
    """Convert string to character tensor"""
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        try:
            tensor[c] = all_characters.index(string[c])
        except ValueError:
            # If character not found, use a safe replacement
            tensor[c] = all_characters.index(' ')
    return tensor

def random_chunk(text, chunk_len):
    """Get a random chunk of text"""
    text_len = len(text)
    if text_len <= chunk_len:
        return text
    start_index = random.randint(0, text_len - chunk_len - 1)
    end_index = start_index + chunk_len + 1
    return text[start_index:end_index]

def random_training_set(text, chunk_len=200, batch_size=8):
    """Create random training set from text"""
    chunks = [random_chunk(text, chunk_len) for _ in range(batch_size)]
    inp = torch.stack([char_tensor(chunk[:-1]) for chunk in chunks])
    target = torch.stack([char_tensor(chunk[1:]) for chunk in chunks])
    return inp, target


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=2):
        """Character-level RNN model"""
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, input):
        """Forward pass for batched input"""
        embedded = self.embed(input)
        output, _ = self.lstm(embedded)
        output = self.dropout(output)
        output = self.fc(output)
        return output
    
    def forward_seq(self, input, hidden=None):
        """Forward pass for single character prediction"""
        batch_size = input.size(0)
        embedded = self.embed(input)
        
        if hidden is None:
            hidden = self.init_hidden(batch_size)
            
        output, hidden = self.lstm(embedded, hidden)
        output = self.dropout(output)
        output = self.fc(output)
        return output, hidden
    
    def init_hidden(self, batch_size):
        """Initialize hidden state"""
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device))

def generate_review(model, rating_range, seed_text, predict_len=500, temperature=0.8):
    """Generate a review based on seed text and rating range"""
    model.eval()
    hidden = None
    prime_input = char_tensor(seed_text).to(device)
    predicted = seed_text
    
    with torch.no_grad():
        for p in range(len(seed_text) - 1):
            _, hidden = model.forward_seq(prime_input[p].unsqueeze(0).unsqueeze(0), hidden)
        
        prime_input = prime_input[-1].unsqueeze(0).unsqueeze(0)
        
        for p in range(predict_len):
            output, hidden = model.forward_seq(prime_input, hidden)
            
            output_dist = output.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(output_dist, 1)[0]
            
            predicted_char = all_characters[top_i]
            
            if "<REVIEW>" in predicted[-7:]:
                break
                
            if "     " in predicted[-5:]:
                break
                
            predicted += predicted_char
            prime_input = torch.tensor([[top_i]], dtype=torch.long).to(device)
            
            if p > 100 and predicted_char in ".!?":
                if random.random() < 0.3:
                    break
    
    predicted = re.sub(r'\s+', ' ', predicted).strip()
    
    if not any(predicted[-1] == c for c in ".!?"):
        predicted += "."
        
    return predicted

def train_rnn_model(rating_range='all', hidden_size=150, n_layers=2, 
                    batch_size=32, chunk_len=100, n_epochs=3000, 
                    print_every=500, learning_rate=0.002):
    """Train an RNN model on the specified rating range"""
    
    print(f"\nTraining RNN for rating range: {rating_range}")
    text = review_texts[rating_range]
    
    model = RNN(n_characters, hidden_size, n_characters, n_layers)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    start = time.time()
    all_losses = []
    loss_avg = 0
    
    print(f"Starting training for {n_epochs} iterations...")
    
    for epoch in range(1, n_epochs + 1):
        model.train()
        inp, target = random_training_set(text, chunk_len, batch_size)
        inp, target = inp.to(device), target.to(device)
        
        optimizer.zero_grad()
        
        output = model(inp)
        
        loss = criterion(output.transpose(1, 2), target)
        
        loss.backward()
        optimizer.step()
        
        loss_avg += loss.item()
        
        if epoch % print_every == 0:
            print(f'[{time_since(start)} ({epoch}/{n_epochs}) {epoch/n_epochs*100:.1f}%] Loss: {loss_avg/print_every:.4f}')
            
            seed = "The game is "
            sample = generate_review(model, rating_range, seed, 100)
            print(f"Sample: {sample}\n")
            
            loss_avg = 0
            
    print(f"Training completed in {time_since(start)}")
    return model

# Utility functions

def time_since(since):
    """Calculate time since a point"""
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Create models for each rating range
print("\n" + "="*80)
print("TRAINING REVIEW GENERATION MODELS")
print("="*80)

models = {}
for rating_range in review_texts.keys():
    if rating_range != 'all': 
        models[rating_range] = train_rnn_model(
            rating_range=rating_range,
            hidden_size=150,
            n_layers=3,
            batch_size=16,
            chunk_len=80,
            n_epochs=4000,  
            print_every=500
        )

def generate_game_review(user_id, game_id):
    """Generate a custom review based on predicted rating"""
    
    if user_id not in allusers:
        return f"Error: User '{user_id}' not found in dataset."
    
    if game_id not in allgames:
        return f"Error: Game '{game_id}' not found in dataset."
    
    # Predict rating using BaselineOnly model
    best_baseline = BaselineOnly(**best_configs['BaselineOnly'])
    best_baseline.fit(trainset)
    
    try:
        predicted_rating = best_baseline.predict(user_id, game_id).est
        print(f"Predicted rating for user '{user_id}' on game '{game_id}': {predicted_rating:.2f}")
        
        if predicted_rating < 3:
            rating_range = '0-3'
        elif predicted_rating < 5:
            rating_range = '3-5'
        elif predicted_rating < 7:
            rating_range = '5-7'
        else:
            rating_range = '7-10'
        
        if rating_range not in models:
            available_ranges = list(models.keys())
            if len(available_ranges) > 0:
                rating_range = available_ranges[0]
            else:
                return f"No trained models available. Using template review instead: {game_id} is {'an excellent' if predicted_rating > 7 else 'a good' if predicted_rating > 5 else 'an average' if predicted_rating > 3 else 'a poor'} game. I rate it {predicted_rating:.1f}/10."
        
        model = models[rating_range]
        
        if predicted_rating >= 8:
            seed = f"I really enjoyed playing {game_id}. "
        elif predicted_rating >= 6:
            seed = f"{game_id} is a good game. "
        elif predicted_rating >= 4:
            seed = f"{game_id} is an average game. "
        else:
            seed = f"I was disappointed with {game_id}. "
        
        generated_review = generate_review(model, rating_range, seed, predict_len=250)
        
        if str(int(predicted_rating)) not in generated_review:
            generated_review += f" I would rate it {predicted_rating:.1f}/10."
        
        return generated_review
        
    except Exception as e:
        print(f"Error generating review: {e}")
        # Fallback to simple template
        return f"{game_id} is {'an excellent' if predicted_rating > 7 else 'a good' if predicted_rating > 5 else 'an average' if predicted_rating > 3 else 'a poor'} game. I rate it {predicted_rating:.1f}/10."

# Demo with a specific user and game
print("\n" + "="*80)
print("GENERATING REVIEW FOR SPECIFIC USER AND GAME")
print("="*80)

user_id = "Govin"
game_id = "Dune"

generated_review = generate_game_review(user_id, game_id)
print("\nGenerated Review:")
print("-" * 80)
print(generated_review)
print("-" * 80)

Using device: cpu
Number of characters: 100
Preparing review text data...
Rating range (0, 3): 6796 reviews, 3770219 characters
Rating range (3, 5): 13611 reviews, 7890486 characters
Rating range (5, 7): 30453 reviews, 17716411 characters
Rating range (7, 10): 59479 reviews, 39372042 characters
Total: 142156 reviews, 90042881 characters

TRAINING REVIEW GENERATION MODELS

Training RNN for rating range: 0-3
Starting training for 4000 iterations...
[0m 27s (500/4000) 12.5%] Loss: 2.6190
Sample: The game is s'atil ou pas le loumit s'en e drent partit monre pus jouer jeux fus des joumoutant jeu pant morhe j.

[0m 52s (1000/4000) 25.0%] Loss: 2.0326
Sample: The game is les parties se mme cogrefte des des ausent de carse du poulus car entrote mes courtaner cartes en d.

[1m 17s (1500/4000) 37.5%] Loss: 1.8497
Sample: The game is chane des 30 subouts est au faut bonne, je pour des colse concontrome d'a me, prenduer, pas plant de.

[1m 42s (2000/4000) 50.0%] Loss: 1.7564
Sample: The game is en

In [27]:
pdAvis[(pdAvis["author"] == "Govin") & (pdAvis["review_length"] > 1)]


Unnamed: 0,_id,author,date_published,title_review,note,review_href,title,url,treated,comment,review_length
11218,63c6748c993b727dc2a778c7,Govin,2018-09-04 11:29:06,Chef-d'oeuvre,10.0,https://www.trictrac.net/avis/chef-doeuvre-1399,Très Futé !,https://www.trictrac.net/jeu-de-societe/tres-f...,True,"LE Jeu de dés. S'il ne devait en rester qu'un,...",61.0
18634,63c674e0993b727dc2a795bf,Govin,2015-10-16 19:29:28,Be Madoff,9.8,https://www.trictrac.net/avis/be-madoff,Ponzi Scheme,https://www.trictrac.net/jeu-de-societe/ponzi-...,True,"Innovant, tendu, du matériel de qualité, une p...",104.0
22694,63c6750d993b727dc2a7a59b,Govin,2015-10-19 13:02:07,"Tiny, mais costaud",7.9,https://www.trictrac.net/avis/tiny-mais-costaud,Tiny Epic Galaxies,https://www.trictrac.net/jeu-de-societe/tiny-e...,True,"De la conquête spatiale, de l'attribution de c...",377.0
26132,63c67536993b727dc2a7b309,Govin,2015-10-17 13:18:39,Mine antifraternelle,8.8,https://www.trictrac.net/avis/mine-antifratern...,Timebomb : Sherlock vs. Moriarty,https://www.trictrac.net/jeu-de-societe/timebo...,True,"Encore un minimaliste, dans un registre de déd...",1449.0
26945,63c67540993b727dc2a7b636,Govin,2015-10-21 13:18:32,Grrrr évolution,8.5,https://www.trictrac.net/avis/grrrr-evolution,Evolution,https://www.trictrac.net/jeu-de-societe/evolut...,True,"Un jeu fluide, limpide et addictif, dont le th...",722.0
...,...,...,...,...,...,...,...,...,...,...,...
229696,63c6827b993b727dc2aace35,Govin,2011-09-08 12:29:01,LöwenHerse,10.0,https://www.trictrac.net/avis/lowenherse,Richard Cœur de Lion,https://www.trictrac.net/jeu-de-societe/richar...,True,"Du Klaus Teuber sans dé, sans brigand, sans re...",571.0
231574,63c682a9993b727dc2aad58b,Govin,2015-10-21 05:38:27,Un poisson trop loin,6.9,https://www.trictrac.net/avis/un-poisson-trop-...,Kleine Fische,https://www.trictrac.net/jeu-de-societe/kleine...,True,"Un des meilleurs stop ou encore pour enfants, ...",70.0
234554,63c682dd993b727dc2aae12f,Govin,2011-09-08 12:29:01,Stratégo version Secret Story,7.2,https://www.trictrac.net/avis/stratego-version...,AttaKuBe,https://www.trictrac.net/jeu-de-societe/attaku...,True,Quel est le secret de ce cube ?Quel cube va qu...,681.0
243870,63c68350993b727dc2ab0593,Govin,2011-09-08 12:29:01,Le jean's du monde ludique,6.0,https://www.trictrac.net/avis/le-jean-s-du-mon...,Échecs,https://www.trictrac.net/jeu-de-societe/echecs...,True,"A l'instar du vêtement cité, les échecs sont i...",836.0


In [28]:

generated_review = generate_game_review("Govin", "Timebomb : Sherlock vs. Moriarty")
print("\nGenerated Review:")
print("-" * 80)
print(generated_review)
print("-" * 80)


Estimating biases using sgd...
Predicted rating for user 'Govin' on game 'Timebomb : Sherlock vs. Moriarty': 7.47

Generated Review:
--------------------------------------------------------------------------------
Timebomb : Sherlock vs. Moriarty is a good game. Et (et tout appenser de la soit que le plateau et la sortir que les nombreux du jeu de proupes combinit pas voient non pourra de son assez ce si propher et moche que est fait entre par par rpbrs et c'est trapce diffrentes nombre des bien retourne et. I would rate it 7.5/10.
--------------------------------------------------------------------------------


In [34]:
print(pdAvis[(pdAvis["author"] == "Govin") & (pdAvis["title"] == "Timebomb : Sherlock vs. Moriarty")])


                            _id author       date_published  \
26132  63c67536993b727dc2a7b309  Govin  2015-10-17 13:18:39   

               title_review  note  \
26132  Mine antifraternelle   8.8   

                                             review_href  \
26132  https://www.trictrac.net/avis/mine-antifratern...   

                                  title  \
26132  Timebomb : Sherlock vs. Moriarty   

                                                     url  treated  \
26132  https://www.trictrac.net/jeu-de-societe/timebo...     True   

                                                 comment  review_length  
26132  Encore un minimaliste, dans un registre de déd...         1449.0  


In [36]:

generated_review = generate_game_review("Govin", "Échecs")
print("\nGenerated Review:")
print("-" * 80)
print(generated_review)
print("-" * 80)


Estimating biases using sgd...
Predicted rating for user 'Govin' on game 'Échecs': 7.47

Generated Review:
--------------------------------------------------------------------------------
Échecs is a good game. Au comme hamme de celui de 4 matures idensant aprs stratgie : la fin est pas matriel, mais ce jeu est pas la rout espre ambiance, iu trs p aux complet la public est un bon action qui est plus trop temps que je pas de toutes la mcanisme reste laisser. I would rate it 7.5/10.
--------------------------------------------------------------------------------
