# COGS 108 Group 171 Project

In [18]:
#Import the necessary libraries to begin project.
import requests
import json
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Getting Metacritic Scores for Games

Here, we can start retrieving some Metacritic scores for some games from a dataset. This dataset can be found at data.world website, where the data is consisting of 13,357 rows. What we need is the game titles, and there corresponding Metacritic scores, along with the date they were released on.

In [19]:
metacritic_df = pd.read_csv('https://query.data.world/s/odkffdqc52dfm34xjwdc67q7cxfjmz')

In [20]:
metacritic_df = metacritic_df[metacritic_df['Metacritic'] != 0]
# Drop all columns besides the game name and the Metacritic score
metacritic_df = metacritic_df[['ResponseName', 'Metacritic', 'ReleaseDate']]
# This dataset has duplicates, so we need to drop those as well
metacritic_df = metacritic_df.drop_duplicates(subset=['ResponseName'])
# Rename ResonseName to GameTitle
metacritic_df.rename(columns={'ResponseName' : 'GameTitle'}, inplace=True)

In [21]:
metacritic_df.head()

Unnamed: 0,GameTitle,Metacritic,ReleaseDate
0,Counter-Strike,88,Nov 1 2000
2,Day of Defeat,79,May 1 2003
6,Half-Life,96,Nov 8 1998
7,Counter-Strike: Condition Zero,65,Mar 1 2004
9,Half-Life: Blue Shift,71,Jun 1 2001


In [5]:
metacritic_df.shape

(2194, 3)

Here we can see that we have 2,194 game titles in our dataset with valid Metacritic scores.

## Retrieving data from Twitch

Now we need to retrieve data from the Twitch API. First, we need to get the `game_id` of each game, and then use those IDs to find the top 20 real-time streams per game.

In [22]:
# This is the data needed to authorize get requests from the Twitch API
data = {'Client-ID' : 'gaov4kdyvd87a5n7vnwn1d49hyv8lk'}

In [23]:
# Create a temporary array to store all the game titles from the metacritic_df dataframe
temp_arr = []
temp_arr = np.array(metacritic_df['GameTitle'])

# Create and initialize the main dataframe
df = pd.DataFrame()
    
# Loop through 100 games at a time, and for each iteration, append the API result to game_info
for i in range(0,temp_arr.size,100):
    
    query_names = temp_arr[i:i+100]
    
    # Send a GET request for 100 game titles
    r = requests.get('https://api.twitch.tv/helix/games', headers=data, params={'name': query_names})
    response = r.json()

    # Write the request data into specified file, and read it into a temporary dataframe
    with open('game_id_info.json','w') as outfile:
        json.dump(response, outfile)
        
    temp_df = pd.read_json('game_id_info.json', orient='index', typ='series')
    temp_df = temp_df['data']
    temp_df = pd.DataFrame(temp_df)
    
    # Append the temporary dataframe to the main dataframe   
    df = df.append(temp_df)



In [24]:
df

Unnamed: 0,box_art_url,id,name
0,https://static-cdn.jtvnw.net/ttv-boxart/Space%...,11956,Space Empires IV Deluxe
1,https://static-cdn.jtvnw.net/ttv-boxart/Rag%20...,1275,Rag Doll Kung Fu
2,https://static-cdn.jtvnw.net/ttv-boxart/Left%2...,18916,Left 4 Dead
3,https://static-cdn.jtvnw.net/ttv-boxart/Multiw...,19711,Multiwinia
4,https://static-cdn.jtvnw.net/ttv-boxart/./Call...,11609,Call of Duty: United Offensive
5,https://static-cdn.jtvnw.net/ttv-boxart/./Safe...,7867,Safecracker: The Ultimate Puzzle Adventure
6,https://static-cdn.jtvnw.net/ttv-boxart/Shadow...,8437,Shadowgrounds
7,https://static-cdn.jtvnw.net/ttv-boxart/Day%20...,5567,Day of Defeat
8,https://static-cdn.jtvnw.net/ttv-boxart/./Cond...,8068,Condemned: Criminal Origins
9,https://static-cdn.jtvnw.net/ttv-boxart/X%20Re...,31315,X Rebirth


Great, we got over 1,560 games returned with their corresponding `game_id`s! Now, we can take each ID and return the top 20 streams of each game being broadcasted currently on Twitch.

In [26]:
# Reuse temp_arr to store each game ID
temp_arr = np.array(df['id'])

stream_df = pd.DataFrame()

# Counter for each request
count = 0

# Loop through 100 games at a time, and for each iteration, append the API result to game_info
for i in range(0,60):
    
    #query_ids = temp_arr[i:i+100]
    
    # Send a GET request for 100 game titles
    r = requests.get('https://api.twitch.tv/helix/streams', headers=data, params={'game_id': str(temp_arr[i])})
    response = r.json()

    # Write the request data into specified file, and read it into a temporary dataframe
    with open('game_stream_info.json','w') as outfile:
        json.dump(response, outfile)
        
    temp_df = pd.read_json('game_stream_info.json', orient='index', typ='series')
    temp_df = temp_df['data']
    temp_df = pd.DataFrame(temp_df)
    
    # Append the temporary dataframe to the main dataframe   
    stream_df = stream_df.append(temp_df)
    
    # Since the server limits us from obtaining more than 30 requests per minute, we need to 
    # pause the loop for one minute after 30 requests.
    if count is 30:
        time.sleep(60) # Wait one minute to obtain 30 more requests from Twitch servers
        count = 0
    else:
        count+=1


In [28]:
stream_df['game_id'].value_counts()
stream_df

Unnamed: 0,community_ids,game_id,id,language,started_at,tag_ids,thumbnail_url,title,type,user_id,user_name,viewer_count
0,[],18916,34338895328,en,2019-05-30T20:23:32Z,[6ea6bca4-4712-4ab9-a906-e3336a9d8039],https://static-cdn.jtvnw.net/previews-ttv/live...,ahhh Shiiit here we go again,live,121511849,trock2323,10
1,[],18916,34335575696,ru,2019-05-30T16:02:17Z,[0569b171-2a2b-476e-a596-5bdfb45a1327],https://static-cdn.jtvnw.net/previews-ttv/live...,old days,live,110342626,hoOmmhk,4
2,[],18916,34338909808,en,2019-05-30T20:24:39Z,[6ea6bca4-4712-4ab9-a906-e3336a9d8039],https://static-cdn.jtvnw.net/previews-ttv/live...,For K<3,live,129313951,leapingking,1
0,"[8deb7b0f-d5a3-4f9d-942d-2331d8f4fe3d, d808cf0...",2755,34334332432,ru,2019-05-30T14:01:31Z,"[67259b26-ff83-444e-9d3c-faab390df16f, 2ffd5c3...",https://static-cdn.jtvnw.net/previews-ttv/live...,Пол жизни дальше.,live,182495901,deussych,39
1,"[d808cf0b-88c3-4ef2-acd0-bae82f17e9c3, dbec2d3...",2755,34337161872,ru,2019-05-30T18:13:14Z,"[77a928f7-39da-4dad-9d81-3e6bd7a36e04, ac763b1...",https://static-cdn.jtvnw.net/previews-ttv/live...,"ДОБРАЯ ПОЛОВИНА РАСПАДА [Half-Life 1, #halflif...",live,129294787,kudeta,18
2,[50aab9c5-b677-4d9a-8b45-7c1c75dd6a99],2755,34334823120,en,2019-05-30T14:51:20Z,[6ea6bca4-4712-4ab9-a906-e3336a9d8039],https://static-cdn.jtvnw.net/previews-ttv/live...,Loooool,live,104246535,Kawaqa,8
3,"[aca761bd-9eb9-4197-865f-2937995be149, b0e7cf1...",2755,34338312736,en,2019-05-30T19:39:18Z,[6ea6bca4-4712-4ab9-a906-e3336a9d8039],https://static-cdn.jtvnw.net/previews-ttv/live...,Half Life Saga - HL 1 - 1st Time Playing - Part 2,live,217986979,GamisticYT,3
0,[],9899,34337037392,ru,2019-05-30T18:03:57Z,[0569b171-2a2b-476e-a596-5bdfb45a1327],https://static-cdn.jtvnw.net/previews-ttv/live...,Старенькая РПГ.,live,172230312,raknogun_waffles,3
0,[],29595,34332372096,en,2019-05-30T10:01:13Z,[6ea6bca4-4712-4ab9-a906-e3336a9d8039],https://static-cdn.jtvnw.net/previews-ttv/live...,LIVE: Evil Geniuses vs. TNC Predator - UB Semi...,live,36481935,ESL_DOTA2,72461
1,[],29595,34336712736,ru,2019-05-30T17:39:04Z,[0569b171-2a2b-476e-a596-5bdfb45a1327],https://static-cdn.jtvnw.net/previews-ttv/live...,"ESL One Birmingham 2019, EG - TNC bo3 | Morta...",live,100815219,Dota2ruhub2,25518


### Test code API

Here is a test example of retrieving data from the Twitch Web API. Let's see if we can extract a JSON file from a URL.

Let's try to retrieve the data and import it into a pandas dataframe.

In [None]:
# Send a GET request to the API to get all the streams for GTA V
r = requests.get('https://api.twitch.tv/helix/streams', headers=data, params={'game_id': ['16676', '29595']})
# load the data as a json into the variable 'response'
response = r.json()
# write the data to the file 'twitch_data.json'
with open('twitch_data.json','w') as outfile:
    json.dump(response, outfile)

In [None]:
df_test = pd.read_json('twitch_data.json', orient='index', typ='series')

In [None]:
df_test = df_test['data']
df_test = pd.DataFrame(df_test)
df_test

In [None]:
df_test.shape