# Sentimental Analysis of Starcraft 2 new patch

## Collecting reviews from their forum

In [1]:
import requests
from bs4 import BeautifulSoup
import os

In [2]:
FILE_R_PATH = 'r.txt' # file where it is saved sc2 data
URL = 'https://us.forums.blizzard.com/en/starcraft/t/new-ladder-season-incoming/2577'

In [3]:
# check if r.txt exists, if not download it
if os.path.exists('r.txt'):
    with open(FILE_R_PATH, 'r') as f:
        r_text = f.read()
else:
    r = requests.get(URL)
    print(r.status_code)
    r_text = r.text

In [4]:
# parse data
soup  = BeautifulSoup(r_text, 'html.parser')

In [5]:
divs = soup.findAll(class_='post')

In [6]:
reviews = []
for div in divs:
    pagraphs = div.findAll('p')
    paragraph = '\n'.join([p.text for p in pagraphs])
    reviews.append(paragraph)

## Analyzing reviews

In [7]:
import pandas as pd
import numpy as np

In [8]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

Let's how many words do we have?

In [9]:
df['word_count'] =  df['review'].apply(lambda x: len(x.split()))

How many characters do we have?

In [10]:
df['char_count'] = df['review'].apply(lambda x: len(x))

Cleaning our data...

In [11]:
df.drop(df[df['char_count'] == 0].index, inplace=True)

Average length of words?

In [12]:
def average_words(x):
    words = x.split()
    big_sum = sum(len(word) for word in words) 
    if big_sum == 0:
        return 0
    return big_sum / len(words)

In [13]:
df['average_word_length'] = df['review'].apply(lambda x: average_words(x))

In [14]:
from nltk.corpus import stopwords

In [16]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/joseph/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [17]:
stop_words = stopwords.words('english')

In [19]:
len(stop_words)

179

In [24]:
df['stopwords_count'] = df['review'].apply(lambda x: len([word for word in x.split() if word.lower() in stop_words]))

In [27]:
df['stopword_rate'] = df['stopwords_count'] / df['word_count']

In [29]:
df.sort_values(by='stopword_rate')

Unnamed: 0,review,word_count,char_count,average_word_length,stopwords_count,stopword_rate
16,ASL12 maps GOGOGO!!!,3,20,6.0,0,0.0
6,Cydra go Marry me!,4,19,3.75,0,0.0
14,Super genial. Desde Huancayo Perú estamos supe...,14,84,5.071429,1,0.071429
18,Nice. Very good. Saludos desde Argentina,6,40,5.833333,1,0.166667
4,very happy happy happy,4,22,4.75,1,0.25
7,What abour 2vs2 ranked??,4,24,5.25,1,0.25
3,Would be interested in chatting with someone r...,14,107,6.714286,4,0.285714
17,Beautiful news!! peruvian fans was expecting a...,12,73,5.083333,4,0.333333
12,Please use the ASL map pool. You can maybe add...,33,189,4.757576,11,0.333333
10,Big thanks!! A thought maybe more map in map p...,26,134,4.192308,9,0.346154


In [30]:
df.describe()

Unnamed: 0,word_count,char_count,average_word_length,stopwords_count,stopword_rate
count,19.0,19.0,19.0,19.0,19.0
mean,21.894737,121.263158,4.724242,8.421053,0.314848
std,31.867599,177.105067,0.809818,14.492587,0.157337
min,3.0,19.0,3.5,0.0,0.0
25%,5.0,29.5,4.221154,1.0,0.25
50%,11.0,61.0,4.625,4.0,0.346154
75%,27.0,144.0,5.077381,10.0,0.436508
max,142.0,796.0,6.714286,64.0,0.5
