In [53]:
import pandas as pd
import requests
import nltk 
import pprint
import csv

In [2]:
#based on the csv created in the previous step, I will create a csv only with the appids. 
column_names = ["Timestamp", "AppID", "Title", "Current_Players", "Peak_Players"]
df = pd.read_csv("SteamTop100byTime.csv", names=column_names)
appids_list = df.AppID.to_list()
df.to_csv('top100appids.csv') 

In [3]:
#this will define the function get_reviews which will use the request reviews function from the STEAM api. 
def get_reviews(appid, params): 
        url_start = 'https://store.steampowered.com/appreviews/'
        try:
            response = requests.get(url=url_start+str(appid), params=params, headers={'User-Agent': 'Mozilla/5.0'})
        except:
                return {'reviews' : []}
        return response.json() # return data extracted from the json response

In [None]:
#this creates an empty list for reviews, the rest are parameters, it includes 100 reviews per game, sorted by helpfulness. 
reviews = []
cursor = '*'
params = { # https://partner.steamgames.com/doc/store/getreviews
    'json' : 1,
    'filter' : 'all', # sort by: recent, updated, all (helpfulness)
    'language' : 'english', # https://partner.steamgames.com/doc/store/localization
    'day_range' : 9223372036854775807, # shows reviews from all time
    'review_type' : 'all', # all, positive, negative
    'purchase_type' : 'all', # all, non_steam_purchase, steam
    'num_per_page' : 100,
    'cursor' : '*'.encode()
}


In [25]:
#create a blank dictionary and calls the appids_list created in the second step. for each app id in the list of ids, it will get the reviews. The app_dict of each app_id is stored in L

app_dict = {}
app_ids = (appids_list)
for i, app_id in enumerate(app_ids):
    l = get_reviews(app_id, params)["reviews"]
    app_dict[app_id] = l

In [63]:
#create a list of STEM words from a .txt file. The list is called STEM_words
import csv

STEM_words = []
with open('STEM_words.csv', newline='') as inputfile:
    for row in csv.reader(inputfile):
        STEM_words.append(row[0])

print(STEM_words)

['ï»¿graph', 'linear ', 'matrix', 'mappings', 'spatial', 'static', 'metric', 'temporal', 'partition', 'calculus', 'robust', 'transactions', 'switch', 'transaction', 'predicate', 'probabilistic', 'latency', 'workload', 'prefix', 'configuration', 'feasible', 'corollary', 'maximal', 'scaling', 'geometry', 'fragment', 'predicates', 'processor', 'weighted', 'geometric', 'verify', 'array', 'fraction', 'heuristic', 'generic', 'hybrid', 'overview', 'verification', 'alignment', 'illumination', 'symmetric', 'connectivity', 'exponential', 'fragment', 'simulations', 'latent', 'cognitive', 'rendering', 'merge', 'differential', 'template', 'additionally', 'default', 'annotations', 'annotation', 'authentication', 'auxiliary', 'equilibrium', 'inverse', 'trajectory', 'trajectories', 'conjunction', 'mapped', 'horizontal', 'anomaly', 'redundant', 'annotated', 'dependence', 'regression', 'disparity', 'correspondences', 'bottleneck', 'congruence', 'diagram', 'probing', 'primitive', 'divergence', 'increment

In [64]:
#create a dictionary called app_freq which will tokenize all the words in the review and match them to the STEM_words list. 
app_freq = {}
scraped_apps = list(app_dict.keys())
for a in scraped_apps:
    rl = app_dict[a]
    d = {sw:0 for sw in STEM_words}
    for i,r in enumerate(rl):
        rev_str = r["review"]
        words = nltk.word_tokenize(rev_str)
        lwords = [w.lower() for w in words]
        for sw in STEM_words:
            if sw in lwords:
                #print(a, i,"found", sw)
                d[sw] = d[sw] + 1
    app_freq[a] = d




In [66]:
#create a dataframe from the app_freq dictionary and save it as a CSV file.     

df = pd.DataFrame.from_dict(app_freq, orient="index")
df.append(df.sum().rename('Total'))
df.to_csv('technicalwords.csv') 
