In [13]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import matplotlib.pyplot as plt
import pandas as pd
import time
import requests
import json


# Part 1: Automated Web Scraping utility to pull data from steam charts 

In [14]:
browser = Browser('chrome')

In [15]:
# Visit the website
# https://store.steampowered.com/charts/topselling/CA
url = "https://store.steampowered.com/charts/topselling/CA"
browser.visit(url)

In [16]:
country_list = ['Global','Canada','Japan','China','United Kingdom','France']

In [73]:
game_names_by_country = {country: [] for country in country_list}

In [74]:
for country in country_list:
    
    dropdown_container = browser.find_by_css('.DialogDropDown').first
    dropdown_container.click()

    country_option = browser.find_by_xpath(f"//div[text()='{country}']").first
    country_option.click()
    
    time.sleep(2)
        
    html = browser.html
    
    soup_obj = soup(html, 'html.parser')
    
    game_elements = soup_obj.find_all('div', class_='weeklytopsellers_GameName_1n_4-')
    for game in game_elements:
        game_names_by_country[country].append(game.text.strip())


In [67]:
data = {'Country': [], 'GameNames': []}
for country, games in game_names_by_country.items():
    data['Country'].extend([country] * len(games))
    data['GameNames'].extend(games)

In [68]:
df = pd.DataFrame(data)
df

Unnamed: 0,Country,GameNames
0,Global,Lethal Company
1,Global,Steam Deck
2,Global,Counter-Strike 2
3,Global,Call of Duty®
4,Global,Baldur's Gate 3
...,...,...
595,France,The Walking Dead: Destinies
596,France,The Elder Scrolls V: Skyrim Special Edition
597,France,Lost Ark: Ultimate Starter Pack
598,France,Wayfinder - Awakened Founder's


# Part 2: API calls to steamspy to retrieve games based on genre

In [59]:
# Define the base URL for the Steam Spy API
base_url = "https://steamspy.com/api.php"


In [60]:
# List of genres you want to analyze
genres = ["Action", "Strategy", "RPG", "Indie", "Adventure", "Sports", "Simulation"]

In [61]:
# Create an empty dictionary to store data for each genre
genre_data = {}

In [62]:
# Make API requests for each genre and store the retrieved data
for genre in genres:
    params = {
        "request": "genre",
        "genre": genre
    }
    
    # Make the GET request to the API
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        # Store the retrieved data in the dictionary using the genre as the key
        genre_data[genre] = response.json()
    else:
        print(f"Failed to fetch data for {genre}. Status code:", response.status_code)

# Convert the retrieved data for each genre into Pandas DataFrames
genre_dataframes = {}
for genre, data in genre_data.items():
    # Convert the data for each genre into a DataFrame
    genre_dataframes[genre] = pd.DataFrame.from_dict(data, orient='index')

# Access and manipulate the dataframes for each genre
for genre, df in genre_dataframes.items():
    print(f"DataFrame for {genre}:")
    print(df.head())  # Display the first few rows of each DataFrame
    print("\n")


DataFrame for Action:
           appid                              name              developer  \
570          570                            Dota 2                  Valve   
730          730  Counter-Strike: Global Offensive                  Valve   
578080    578080               PUBG: BATTLEGROUNDS          KRAFTON, Inc.   
1063730  1063730                         New World           Amazon Games   
1172470  1172470                      Apex Legends  Respawn Entertainment   

               publisher score_rank  positive  negative  userscore  \
570                Valve              1746781    379927          0   
730                Valve              6820541    952735          0   
578080     KRAFTON, Inc.              1304127    954780          0   
1063730     Amazon Games               189547     79930          0   
1172470  Electronic Arts               608675    156868          0   

                             owners  average_forever  average_2weeks  \
570      200,000,000 .

In [37]:
# Convert price column from cents to dollars
for genre, df in genre_dataframes.items():
    price_columns = ['price', 'initialprice']  # Replace with your actual column names

    if not df.empty:
        # Check if the specified columns exist in the DataFrame
        valid_columns = [col for col in price_columns if col in df.columns]

        if valid_columns:
            # Convert each valid price column from string to numeric type (if necessary)
            for col in valid_columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert strings to numeric values

            # Perform division by 100 to convert from cents to dollars
            df[valid_columns] = df[valid_columns] / 100  # Division by 100 for cents to dollars
            
            # Dropping columns with null values
            df.dropna(inplace=True) 

            # Export the modified DataFrame to a CSV file
            df.to_csv(f'{genre}_data_converted.csv', index=False)
        else:
            print(f"No valid price columns found in DataFrame for {genre}.")
    else:
        print(f"DataFrame for {genre} is empty.")


DataFrame for MMO is empty.


In [21]:
genre_dataframes['Indie']

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,ccu
304930,304930,Unturned,Smartly Dressed Games,Smartly Dressed Games,,483857,44876,0,"20,000,000 .. 50,000,000",8305,2541,393,349,0,0,0,57371
252490,252490,Rust,Facepunch Studios,Facepunch Studios,,866245,128484,0,"20,000,000 .. 50,000,000",19251,1542,4045,649,3999,3999,0,103457
105600,105600,Terraria,Re-Logic,Re-Logic,,1192625,28374,0,"20,000,000 .. 50,000,000",6860,573,1873,469,999,999,0,46928
291550,291550,Brawlhalla,Blue Mammoth Games,Ubisoft,,287849,58993,0,"20,000,000 .. 50,000,000",4038,340,233,70,0,0,0,19623
4000,4000,Garry's Mod,Facepunch Studios,Valve,,987214,33370,0,"20,000,000 .. 50,000,000",8925,292,1072,102,999,999,0,31432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
706550,706550,Kaya,KirUn,KirUn,,0,0,0,"0 .. 20,000",0,0,0,0,0,0,0,0
858730,858730,Yoke Light,Kotoshiro,Kotoshiro,,0,0,0,"0 .. 20,000",0,0,0,0,99,99,0,0
721460,721460,Xeno Time Inception,Xeno Gaming LLC,Xeno Gaming LLC,,0,0,0,"0 .. 20,000",0,0,0,0,0,0,0,0
1119500,1119500,FIGHT BALL - BOXING VR,J-TEC,J-TEC,,2,2,0,"0 .. 20,000",0,0,0,0,499,499,0,0


In [63]:
def calculate_total_ccu(df):
    total_ccu = df['ccu'].sum()
    print(f"Total CCU for the DataFrame:\n{total_ccu}")
    print(f"CCU values for the DataFrame:\n{df['ccu']}")
    return total_ccu


In [54]:
total_ccu_Action = calculate_total_ccu(genre_dataframes["Action"])

Total CCU for the DataFrame:
5502263
CCU values for the DataFrame:
570        610539
730        880149
578080     300658
1063730     27343
1172470    193855
            ...  
848400          0
858730          0
721460          0
1119500         0
2225520         1
Name: ccu, Length: 29306, dtype: int64


In [64]:
total_ccu_per_genre = []

for genre in genres:
         if valid_columns:
            total_ccu = calculate_total_ccu(genre_dataframes[genre])
            total_ccu_per_genre.append(total_ccu)

Total CCU for the DataFrame:
5502263
CCU values for the DataFrame:
570        610539
730        880149
578080     300658
1063730     27343
1172470    193855
            ...  
848400          0
858730          0
721460          0
1119500         0
2225520         1
Name: ccu, Length: 29306, dtype: int64
Total CCU for the DataFrame:
2065747
CCU values for the DataFrame:
570        610539
1086940    159283
322330      42977
291480        981
444090       7543
            ...  
662570          0
875680          0
865810          0
848400          0
706550          0
Name: ccu, Length: 13552, dtype: int64
Total CCU for the DataFrame:
1821192
CCU values for the DataFrame:
1063730    27343
1599340    54879
252490     95122
230410     54529
105600     40704
           ...  
599490         0
652730         0
611820         0
643930         0
721460         0
Name: ccu, Length: 12063, dtype: int64
Total CCU for the DataFrame:
2095415
CCU values for the DataFrame:
304930     59383
252490     9512

In [65]:
total_ccu_per_genre

[5502263, 2065747, 1821192, 2095415, 3031973, 408396, 1762963]

In [67]:
genre_totals = pd.DataFrame({
    'Genre': genres,
    'Total_Concurrent_Users': total_ccu_per_genre
})
genre_totals

Unnamed: 0,Genre,Total_Concurrent_Users
0,Action,5502263
1,Strategy,2065747
2,RPG,1821192
3,Indie,2095415
4,Adventure,3031973
5,Sports,408396
6,Simulation,1762963
