In [None]:
import requests
import json
import csv
import time
import random
from datetime import datetime
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class SteamRPGScraper:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        self.reviews_collected = 0
        self.target_reviews = 10000

    def get_rpg_games(self, limit=50):
        """Get list of popular RPG games from Steam"""
        rpg_games = [
            {'appid': 228880, 'name': 'Baldur\'s Gate: Enhanced Edition'},
            {'appid': 257350, 'name': 'Baldur\'s Gate II: Enhanced Edition'},
            {'appid': 1086940, 'name': 'Baldur\'s Gate 3'},
            {'appid': 362890, 'name': 'Black Desert Online'},
            {'appid': 1091500, 'name': 'Cyberpunk 2077'},
            {'appid': 211420, 'name': 'Dark Souls: Prepare to Die Edition'},
            {'appid': 236430, 'name': 'Dark Souls II'},
            {'appid': 374320, 'name': 'Dark Souls III'},
            {'appid': 570940, 'name': 'Dark Souls: Remastered'},
            {'appid': 230230, 'name': 'Divinity: Original Sin'},
            {'appid': 317400, 'name': 'Divinity: Original Sin 2'},
            {'appid': 975370, 'name': 'Dwarf Fortress'},
            {'appid': 1245620, 'name': 'ELDEN RING'},
            {'appid': 377160, 'name': 'Fallout 4'},
            {'appid': 22370, 'name': 'Fallout: New Vegas'},
            {'appid': 1593500, 'name': 'God of War'},
            {'appid': 1145360, 'name': 'Hades'},
            {'appid': 367520, 'name': 'Hollow Knight'},
            {'appid': 466300, 'name': 'Icewind Dale: Enhanced Edition'},
            {'appid': 1096300, 'name': 'Kenshi'},
            {'appid': 17460, 'name': 'Mass Effect'},
            {'appid': 24980, 'name': 'Mass Effect 2'},
            {'appid': 1446780, 'name': 'Monster Hunter Rise'},
            {'appid': 582010, 'name': 'MONSTER HUNTER: WORLD'},
            {'appid': 48700, 'name': 'Mount & Blade: Warband'},
            {'appid': 261550, 'name': 'Mount & Blade II: Bannerlord'},
            {'appid': 238010, 'name': 'Neverwinter Nights: Enhanced Edition'},
            {'appid': 524220, 'name': 'NieR: Automata'},
            {'appid': 387290, 'name': 'Ori and the Blind Forest'},
            {'appid': 1265920, 'name': 'Ori and the Will of the Wisps'},
            {'appid': 704450, 'name': 'Pathfinder: Kingmaker'},
            {'appid': 1184370, 'name': 'Pathfinder: Wrath of the Righteous'},
            {'appid': 292120, 'name': 'Persona 4 Golden'},
            {'appid': 1687950, 'name': 'Persona 5 Royal'},
            {'appid': 447040, 'name': 'Pillars of Eternity'},
            {'appid': 560130, 'name': 'Pillars of Eternity II: Deadfire'},
            {'appid': 321800, 'name': 'Planescape: Torment: Enhanced Edition'},
            {'appid': 1174180, 'name': 'Red Dead Redemption 2'},
            {'appid': 294100, 'name': 'RimWorld'},
            {'appid': 814380, 'name': 'Sekiro: Shadows Die Twice'},
            {'appid': 646570, 'name': 'Slay the Spire'},
            {'appid': 413150, 'name': 'Stardew Valley'},
            {'appid': 2108330, 'name': 'Tales of Arise'},
            {'appid': 367500, 'name': 'Tales of Berseria'},
            {'appid': 638970, 'name': 'Tales of Vesperia: Definitive Edition'},
            {'appid': 351970, 'name': 'Tales of Zestiria'},
            {'appid': 250900, 'name': 'The Binding of Isaac: Rebirth'},
            {'appid': 22330, 'name': 'The Elder Scrolls IV: Oblivion'},
            {'appid': 489830, 'name': 'The Elder Scrolls V: Skyrim Special Edition'},
            {'appid': 611670, 'name': 'The Elder Scrolls Online'},
            {'appid': 292030, 'name': 'The Witcher 3: Wild Hunt'},
            {'appid': 391540, 'name': 'Undertale'}
        ]

        logger.info(f"Using {len(rpg_games)} RPG games for scraping")
        return rpg_games

    def get_game_reviews(self, appid, cursor='*', num_per_page=20):
        """Fetch reviews for a specific game"""
        url = "https://store.steampowered.com/appreviews/" + str(appid)

        params = {
            'json': 1,
            'cursor': cursor,
            'language': 'english',
            'day_range': 9223372036854775807,
            'review_type': 'all',
            'purchase_type': 'all',
            'num_per_page': num_per_page,
            'filter': 'recent'
        }

        try:
            response = self.session.get(url, params=params, timeout=10)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            logger.error(f"Error fetching reviews for app {appid}: {e}")
            return None

    def parse_review(self, review, game_name):
        """Parse individual review data - simplified to 5 columns"""
        hours_played = review.get('author', {}).get('playtime_forever', 0) / 60  # Convert minutes to hours

        return {
            'game_name': game_name,
            'hours_played': round(hours_played, 1),
            'genre': 'RPG',  # All games in our list are RPGs
            'review_text': review.get('review', '').replace('\n', ' ').replace('\r', ' ').strip(),
            'recommendation': 'Recommended' if review.get('voted_up') else 'Not Recommended'
        }

    def scrape_reviews_for_game(self, game_info, max_reviews_per_game):
        """Scrape reviews for a single game"""
        appid = game_info['appid']
        game_name = game_info['name']

        logger.info(f"Starting to scrape reviews for {game_name} (AppID: {appid}) - Target: {max_reviews_per_game} reviews")

        reviews = []
        cursor = '*'
        pages_scraped = 0

        while len(reviews) < max_reviews_per_game and self.reviews_collected < self.target_reviews:
            # Rate limiting
            time.sleep(random.uniform(1, 2))

            data = self.get_game_reviews(appid, cursor)

            if not data or not data.get('success'):
                logger.warning(f"Failed to get reviews for {game_name}")
                break

            if not data.get('reviews'):
                logger.info(f"No more reviews available for {game_name}")
                break

            for review in data['reviews']:
                review_text = review.get('review', '').strip()
                if review_text and len(review_text) > 10:
                    parsed_review = self.parse_review(review, game_name)
                    reviews.append(parsed_review)
                    self.reviews_collected += 1

                    if len(reviews) >= max_reviews_per_game or self.reviews_collected >= self.target_reviews:
                        break

            cursor = data.get('cursor')
            pages_scraped += 1

            logger.info(f"Scraped page {pages_scraped} for {game_name}. "
                       f"Reviews for this game: {len(reviews)}, "
                       f"Total reviews: {self.reviews_collected}")

            if not cursor:
                break

        logger.info(f"Finished scraping {game_name}. Collected {len(reviews)} reviews.")
        return reviews

    def save_reviews_to_csv(self, all_reviews, filename=None):
        """Save reviews to CSV file"""
        if not filename:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"steam_rpg_reviews_{timestamp}.csv"

        if not all_reviews:
            logger.warning("No reviews to save")
            return

        fieldnames = ['game_name', 'hours_played', 'genre', 'review_text', 'recommendation']

        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_reviews)

        logger.info(f"Saved {len(all_reviews)} reviews to {filename}")
        return filename

    def run_scraper(self):
        """Main scraping function"""
        logger.info(f"Starting Steam RPG reviews scraper. Target: {self.target_reviews} reviews")

        rpg_games = self.get_rpg_games()

        reviews_per_game = self.target_reviews // len(rpg_games)
        logger.info(f"Target reviews per game: {reviews_per_game}")

        all_reviews = []

        for game in rpg_games:
            if self.reviews_collected >= self.target_reviews:
                break

            try:
                remaining_games = len(rpg_games) - rpg_games.index(game)
                remaining_reviews = self.target_reviews - self.reviews_collected
                current_game_target = min(reviews_per_game, remaining_reviews // remaining_games if remaining_games > 0 else remaining_reviews)

                game_reviews = self.scrape_reviews_for_game(game, current_game_target)
                all_reviews.extend(game_reviews)

                if len(all_reviews) >= 2000 and len(all_reviews) % 2000 < reviews_per_game:
                    backup_filename = f"backup_reviews_{len(all_reviews)}.csv"
                    self.save_reviews_to_csv(all_reviews, backup_filename)

            except Exception as e:
                logger.error(f"Error scraping {game['name']}: {e}")
                continue

        filename = self.save_reviews_to_csv(all_reviews)

        logger.info(f"Scraping completed! Collected {len(all_reviews)} reviews total.")
        logger.info(f"Data saved to: {filename}")

        return all_reviews, filename

def main():
    scraper = SteamRPGScraper()
    reviews, filename = scraper.run_scraper()

    if reviews:
        positive_reviews = sum(1 for r in reviews if r['recommendation'] == 'Recommended')
        games_covered = len(set(r['game_name'] for r in reviews))
        avg_hours = sum(r['hours_played'] for r in reviews) / len(reviews)

        print(f"\nSummary Statistics:")
        print(f"Total reviews collected: {len(reviews)}")
        print(f"Recommended reviews: {positive_reviews} ({positive_reviews/len(reviews)*100:.1f}%)")
        print(f"Not Recommended reviews: {len(reviews)-positive_reviews} ({(len(reviews)-positive_reviews)/len(reviews)*100:.1f}%)")
        print(f"Games covered: {games_covered}")
        print(f"Average hours played: {avg_hours:.1f}")
        print(f"Data saved to: {filename}")

        game_counts = {}
        for review in reviews:
            game_counts[review['game_name']] = game_counts.get(review['game_name'], 0) + 1

        print(f"\nReviews per game (showing top 10):")
        for game, count in sorted(game_counts.items(), key=lambda x: x[1], reverse=True)[:10]:
            print(f"  {game}: {count} reviews")

if __name__ == "__main__":
    main()