In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import matplotlib.pyplot as plt
import pandas as pd
import time
import requests
import json


# Part 1: Automated Web Scraping utility to pull data from steam charts 

In [70]:
browser = Browser('chrome')

In [71]:
# Visit the website
# https://store.steampowered.com/charts/topselling/CA
url = "https://store.steampowered.com/charts/topselling/CA"
browser.visit(url)

In [72]:
country_list = ['Global','Canada','Japan','China','United Kingdom','France']

In [73]:
game_names_by_country = {country: [] for country in country_list}

In [74]:
for country in country_list:
    
    dropdown_container = browser.find_by_css('.DialogDropDown').first
    dropdown_container.click()

    country_option = browser.find_by_xpath(f"//div[text()='{country}']").first
    country_option.click()
    
    time.sleep(2)
        
    html = browser.html
    
    soup_obj = soup(html, 'html.parser')
    
    game_elements = soup_obj.find_all('div', class_='weeklytopsellers_GameName_1n_4-')
    for game in game_elements:
        game_names_by_country[country].append(game.text.strip())


In [67]:
data = {'Country': [], 'GameNames': []}
for country, games in game_names_by_country.items():
    data['Country'].extend([country] * len(games))
    data['GameNames'].extend(games)

In [68]:
df = pd.DataFrame(data)
df

Unnamed: 0,Country,GameNames
0,Global,Lethal Company
1,Global,Steam Deck
2,Global,Counter-Strike 2
3,Global,Call of Duty®
4,Global,Baldur's Gate 3
...,...,...
595,France,The Walking Dead: Destinies
596,France,The Elder Scrolls V: Skyrim Special Edition
597,France,Lost Ark: Ultimate Starter Pack
598,France,Wayfinder - Awakened Founder's


# Part 2: API calls to steamspy to retrieve games based on genre

In [3]:
# Define the base URL for the Steam Spy API
base_url = "https://steamspy.com/api.php"


In [7]:
# List of genres you want to analyze
genres = ["Action", "Strategy", "RPG", "Indie", "Adventure", "Sports", "Simulation", "MMO"]

In [8]:
# Create an empty dictionary to store data for each genre
genre_data = {}

In [9]:
# Make API requests for each genre and store the retrieved data
for genre in genres:
    params = {
        "request": "genre",
        "genre": genre
    }
    
    # Make the GET request to the API
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        # Store the retrieved data in the dictionary using the genre as the key
        genre_data[genre] = response.json()
    else:
        print(f"Failed to fetch data for {genre}. Status code:", response.status_code)

# Convert the retrieved data for each genre into Pandas DataFrames
genre_dataframes = {}
for genre, data in genre_data.items():
    # Convert the data for each genre into a DataFrame
    genre_dataframes[genre] = pd.DataFrame.from_dict(data, orient='index')

# Access and manipulate the dataframes for each genre
for genre, df in genre_dataframes.items():
    print(f"DataFrame for {genre}:")
    print(df.head())  # Display the first few rows of each DataFrame
    print("\n")


DataFrame for Action:
           appid                              name              developer  \
570          570                            Dota 2                  Valve   
730          730  Counter-Strike: Global Offensive                  Valve   
578080    578080               PUBG: BATTLEGROUNDS          KRAFTON, Inc.   
1063730  1063730                         New World           Amazon Games   
1172470  1172470                      Apex Legends  Respawn Entertainment   

               publisher score_rank  positive  negative  userscore  \
570                Valve              1746111    379671          0   
730                Valve              6818622    951925          0   
578080     KRAFTON, Inc.              1303590    954525          0   
1063730     Amazon Games               189465     79914          0   
1172470  Electronic Arts               608338    156655          0   

                             owners  average_forever  average_2weeks  \
570      200,000,000 .

In [12]:
for genre, df in genre_dataframes.items():
    df.to_csv(f'{genre}_data.csv', index=False)