In [None]:
import sqlite3, pandas as pd

def load_sql(db_name, tbl_name):
    """Load SQLite database."""
    con = sqlite3.connect(f"database/{db_name}.db")
    df = pd.read_sql(f"SELECT * FROM {tbl_name}", con)
    con.close()
    return df

df = load_sql('tweets_v7', 'tweets_v7')

In [None]:
pos_words = []
neg_words = []

with open('resources/pos_words.txt', 'r') as file:
    for word in file.readlines():
        word = word.replace('\n', '')
        pos_words.append(word)
    
with open('resources/neg_words.txt') as file:
    for word in file.readlines():
        word = word.replace('\n', '')
        neg_words.append(word)

In [None]:
from gensim.utils import simple_preprocess

def gen_words(texts):
    final = []
    for text in texts:
        new = simple_preprocess(text, deacc=True)
        final.append(new)
    return final

data_words = gen_words(df['text'])

In [None]:
from collections import Counter

neg_words_list = []
pos_words_list = []

for text in data_words:
    for word in text:
        if word in neg_words:
            neg_words_list.append(word)
        if word in pos_words:
            pos_words_list.append(word)
            
neg_words_list = Counter(neg_words_list)
pos_words_list = Counter(pos_words_list)

In [None]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud

def word_cloud(word_freq, title='Word Cloud'):
    title = title.replace(' ', '_')
    wc = WordCloud(
        width=500,
        height=300,
        background_color='white',
        colormap='Paired',
        max_font_size=200,
        max_words=100)
    wc.generate_from_frequencies(word_freq)
    plt.figure(figsize=(12,12))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.savefig(f'figures/WC_{title}')
    
word_cloud(neg_words_list, 'Negative Words')
word_cloud(pos_words_list, 'Positive Words')