## Note Processing Notebook

In [1]:
''' This notebook contains code for note processing.

Contributors:
    Sam Sui
'''

# Standard libraries
import ssl
import zipfile
import io

# Third party libraries
import nltk
from nltk.tokenize import sent_tokenize
import pandas as pd
import requests

ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('punkt')

SAVE_TO_CSV = False

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Yoshi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Read Data (via Dataverse)

In [2]:
# Download the patch notes and win rates for each patch version and champion from Dataverse.
#   Note: "?format=original" used to download CSV file instead of tab.
#   Reference: https://guides.dataverse.org/en/6.1/api/dataaccess.html

file_url = 'https://dataverse.harvard.edu/api/access/datafile/10168372?format=original'

# Make a GET request to download the file
response = requests.get(file_url)

filename = 'patches.csv'

# Check if the request was successful
if response.status_code == 200:
    # Read the content of the file into memory
    file_bytes = io.BytesIO(response.content)
    
    # Write the bytes to a file to later be accessed.
    with open(filename, 'wb') as f:
        f.write(file_bytes.getbuffer())
else:
    print('Download Failed!', response.status_code)

In [3]:
df = pd.read_csv('patches.csv')
df

Unnamed: 0,version,champion,note,win_rate
0,V10_1,Aphelios,Movement speed reduced to 325 from 330. Base h...,54.0
1,V10_1,Aphelios,Infernum splash damage reduced to 75% from 100%.,54.0
2,V10_1,Aurelion Sol,Base damage reduced to 12 − 120 (based on leve...,13.0
3,V10_1,Azir,Base damage increased to 175 / 325 / 475 from ...,46.0
4,V10_1,Corki,Base damage increased to 90 / 125 / 160 from 9...,46.0
...,...,...,...,...
7417,V9_8,Singed,Mana cost reduced to 60 / 70 / 80 / 90 / 100 t...,
7418,V9_8,Trundle,Base damage increased to 20 / 40 / 60 / 80 / 1...,100.0
7419,V9_8,Trundle,Cooldown reduced to 100 / 80 / 60 seconds from...,100.0
7420,V9_8,Zed,Base damage reduced to 80 / 115 / 150 / 185 / ...,


## Note Processing

In [4]:
def split_notes_into_sentences(df):
    new_rows = []
    for _, row in df.iterrows():
        if pd.isnull(row['note']):
            print(row)
            continue
        sentences = sent_tokenize(row['note'])
        for sentence in sentences:
            new_row = row.copy()
            new_row['note'] = sentence
            new_rows.append(new_row)
    return pd.DataFrame(new_rows)

new_df = split_notes_into_sentences(df)
print(new_df)

version           V5_23
champion    Champion Re
note                NaN
win_rate            NaN
Name: 4080, dtype: object
version     V5_23
champion    Quinn
note          NaN
win_rate      NaN
Name: 4106, dtype: object
version     V6_2
champion    Zyra
note         NaN
win_rate    50.0
Name: 4534, dtype: object
version       V6_7
champion    Rengar
note           NaN
win_rate       NaN
Name: 4719, dtype: object
version       V6_7
champion    Syndra
note           NaN
win_rate       NaN
Name: 4722, dtype: object
version          V6_8
champion    Master Yi
note              NaN
win_rate          NaN
Name: 4740, dtype: object
version          V6_8
champion    Master Yi
note              NaN
win_rate          NaN
Name: 4741, dtype: object
version          V6_8
champion    Master Yi
note              NaN
win_rate          NaN
Name: 4742, dtype: object
version          V6_8
champion    Master Yi
note              NaN
win_rate          NaN
Name: 4743, dtype: object
version     V7_15
champion

In [5]:
if SAVE_TO_CSV: new_df.to_csv('patches_sentences.csv', index=False, na_rep='NaN')