# Street Fighter V Frame Data Analysis

## Data Import & Set up

In [1]:
import time
import json
import requests
import email
import imaplib

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

In [2]:
#accessing the log in details contained on obscured files
with open(r'C:\Users\Louis\.secret\steam_login_details.json') as f:
    steam_login_details = json.load(f)

with open(r'C:\Users\Louis\.secret\gmail_login_details.json') as f:
    gmail_login_details = json.load(f)

    
#creating variables containing each login detail
steam_uname = steam_login_details[0]['username']
steam_pword = steam_login_details[1]['password']

gmail_uname = gmail_login_details[0]['username']
gmail_pword = gmail_login_details[1]['password']

## Web Scraping

#### Using Beautiful Soup *(initial exploration)*

In [3]:
#scraping the web page with BeautifulSoup 

#using requests to access the Guile (SFV character) frame data page
page_url = "https://game.capcom.com/cfn/sfv/character/guile/frame/table#vt1"
web_page = requests.get(page_url)

#parsing the sites html with BS and printing an easier to read form 
guile_soup = BeautifulSoup(web_page.content, 'html.parser')
print(guile_soup.prettify)

#locating the table with the desired data. this returned an empty list
guile_soup.find("table", {"class": "frameTbl"})

<bound method Tag.prettify of <!DOCTYPE html>

<html lang="ja">
<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#">
<meta charset="utf-8"/>
<meta content="IE=edge,chrome=1" http-equiv="x-UA-Compatible"/>
<meta content="index,follow" name="robots"/>
<meta content="width=device-width,initial-scale=1.0,user-scalable=yes" name="viewport"/>
<meta content="CAPCOM,カプコン,ストリートファイター,格闘,SFV,シャド研,シャドルー格闘家研究所,STREETFIGHTER,格闘ゲーム,CFN,シャドル" name="keywords"/>
<meta content="CAPCOM" name="author"/>
<meta content="(C) CAPCOM U.S.A., INC. 2016 ALL RIGHTS RESERVED." name="copyright"/>
<meta content="通常技は非常に優秀なので得意な間合いをキチンとつかんで相手の攻撃をつぶしていこう。Ｖスキルのソニックブレイドは牽制に使えるぞ。また、ソニックブレイド中にソニックブームを発動する事で、ソニックブームが更に強力な飛び道具攻撃なるぞ！トリガーはソニックブームの連射！押すボタンの強度でスピードが変わる。またＶトリガー中にクリティカルアーツを出すと威力とヒット数がアップする！" name="description"/>
<meta content="summary_large_image" name="twitter:card"/>
<meta content="@SF_Community" name="twitter:site"/>
<meta content="フレーム表 | ガイル | キャラクターデータ | CAPCOM：シャ

It turns out that this table could not be found by Beautiful Soup even though I can see the element I'm searching for in the inspect element feature. I found that the div containing the table I want to access also has a **\<script>..\<script>** tag inside it. <br>
    Most likely, the html for the target table is being **dynamically generated** by JavaSript code. Because BS interacts with the url without loading the page, any content that is written in real-time will not be there yet. <br>
    So I will use Selenium to write an autamated script that will navigate through the site, take me through steams login and security email confirmation process and then bring up a characters frame data page to be scrapped with BS. <br>
    Later this script will be written to scrape all characters frame data.
    <br><br>

#### Navigating the Web *(using Selenium)*

In [4]:
#I will be creating a script that will sign me into capcoms site and take me to a characters frame data page. Capcom uses Steams login API for PC logins

#setting up the web driver. arg takes the web-drivers path directory
#the 'r' denotes a raw string, needed because of the windows directory syntax
browser = webdriver.Chrome(r'C:\Users\Louis\Desktop\SFV_Frame_data_Analysis\chrome_driver\chromedriver.exe')
browser.maximize_window()
browser.get(page_url)

#1st page
#locating and clicking on the log in button. I Had to use the webdriver to access the hyperlink as the button wasn't clickable
steam_signin = browser.find_element_by_class_name('loginSteam')
browser.get(steam_signin.get_attribute('href'))

#2nd page
#clicking the'agree' button on t&c page
browser.find_element_by_css_selector("[type = 'submit']").click()

#3rd page
#entering steam login details
browser.find_element_by_css_selector("input[id='steamAccountName']").send_keys(steam_uname)
browser.find_element_by_css_selector("input[id='steamPassword']").send_keys(steam_pword)
browser.find_element_by_css_selector("input[id='imageLogin']").click()

#### Navigating and Scraping email  *(using IMAP & Beautiful Soup)*

In [5]:
#now I will write a script that will access my gmail and scrape the authentication password from Steam's login verification auto-send email

time.sleep(5) #slight delay to allow time for the email to be received 

#signing in to gmail
e_mail = imaplib.IMAP4_SSL('imap.gmail.com')
e_mail.login(gmail_uname, gmail_pword)

#selecting the main inbox and searching it for all emails
e_mail.select('INBOX')
result, data = e_mail.uid('search', None, 'ALL')

#turning the string that lists the email's uid's into a list of seperate uid's. Then selecting the most recent email in the list (they are ordered chronologically)
emails_list = data[0].split()
most_recent = emails_list[-1]

#fetching the most recent email in the inbox and decoding it
result2, email_data = e_mail.uid('fetch', most_recent, '(RFC822)')
raw_email = email_data[0][1].decode('utf-8')




#this part simply allows me to find the required elements in the html 
#using the email module to create an email object from the raw email data
email_message = email.message_from_string(raw_email)

#retrieving the payload to access the emails body of text and/or it's html 
email_payload = email_message.get_payload()
email_html = email_payload[1] #two items in the list. 1st is the body of text, 2nd is the html 

print(email_html)

Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 7bit

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
	<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
	<html>
	<head>
		<meta content="text/html;charset=UTF-8" http-equiv="Content-Type">
		<style media="all" type="text/css">
		td, p, h1, h3, a {font-family: Helvetica, Arial, sans-serif;}
		</style>
	</head>
	
<body LINK="#c6d4df" ALINK="#c6d4df" VLINK="#c6d4df" TEXT="#c6d4df" style="font-family: Helvetica, Arial, sans-serif; font-size: 14px; color: #c6d4df;" >
<table style="width: 538px; background-color: #393836;" align="center" cellspacing="0" cellpadding="0">
	<tr>
		<td style=" height: 65px; background-color: #000000; border-bottom: 1px solid #4d4b48;">
              <img src="https://store.steampowered.com/public/shared/images/email/email_header_logo.png" width="538" height="65" alt="Steam">
        </td>
	</tr>
	<tr>
		<td bgcolor="#17212e">
			<table width="470" 

In [14]:
#now scraping the email for the authentication password
#creating a soup of the emails html and locating the element that contains the auth password in text form
style_value = 'font-size: 24px; color: #66c0f4; font-family: Arial, Helvetica, sans-serif; font-weight: bold;'  #only attribute is the cumbersome 'style' attribute 
email_soup = BeautifulSoup(raw_email)
target_element = email_soup.find('span', 
                                 {'style' : style_value})

#retrieving the authentication code and entering it into the Steam password input box
auth_code = target_element.text
browser.find_element_by_css_selector("input[id='authcode']").send_keys(auth_code)


#now need to find a way to click submit code 
# browser.find_elements_by_tag_name('div')

[<selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="fa82665d-0aef-493f-a70a-481eeb259c23")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="e57c0f5a-e904-4abf-ada7-77895b1d9d76")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="ac6834f6-61b3-48d8-9099-dfa205bb7853")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="4b6efe16-4d4f-4f92-bec1-ac963b61cca6")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="6ae33156-2aea-4cc3-a1ed-dfaf27c181d7")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="25e50b19-ad5a-46d6-9ef3-7f868db5fcf7")>,
 <selenium.webdriver.remote.webelement.WebElement (session="0f6112f92e05c2bee7578b58e937b314", element="6dff25e4-1d78-4028-be95-cb

In [None]:
#After login in

# go_to_button = browser.find_element_by_link_text('Go to Frame List').click()

In [None]:
# browser.refresh()