This notebook will help you sample posts for close reading

In [34]:
import nltk
import pandas as pd
from datetime import datetime
import re
from IPython.display import display
nltk.download('vader_lexicon')
nltk.download('punkt')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
from nltk.corpus import stopwords
pd.set_option('display.max_colwidth', None)
stop_words = stopwords.words("english")
pd.set_option("max_rows", None)
import itertools
def reformat_data(df):
    list_df = []
    for i, row in df.iterrows():
        row_list = str(row).strip(f'Name: {i}, dtype: object').strip('Field1').strip('\n').strip(' ').split('\\n')
        if len(row_list) < 5:
            continue
        if row_list[4] == 'Replying to ':
            continue
        while '' in row_list:
            row_list.remove('')
        del row_list[0] #delete name (we're using the handle)
        del row_list[1] #delete dot
        end = -1
        for i in (-3, -2, -1):
            if (row_list[i].isnumeric()):
                end = i
                break
        content_list = row_list[2:end]
        content = ''.join(i for i in content_list)
        done_list = row_list[:2]+row_list[end:]+[None]*(end+3)+[content]
        if len(done_list[1]) < 8: #if there is no year
            done_list[1] += ' 2020'
        while len(done_list) < 6:
            done_list.insert(-2, None)
        list_df.append(done_list)
    return list_df

def get_lemmed_sorted_dict(df):
    list_df = reformat_data(df)
    main_df = pd.DataFrame(list_df, columns=["Name", "Date", "Num_comments", "Num_retweets", "Num_likes", "Content"])
    VADER_compound_score_list = []
    for text in main_df["Content"]:
        VADER_compound_score_list.append(analyse(text))
    main_df["VADER_score"] = VADER_compound_score_list
    lemmed_adj_sorted_dict = get_adjectives_dict(main_df, stop_words)
    return lemmed_adj_sorted_dict, main_df

def analyse(text):
    scores = sid.polarity_scores(text)
    return scores["compound"]

def get_adjectives_dict(df, stop_words):
    lemmed_adj_dict = {}
    for sentence in df["Content"]:
        tokens = nltk.word_tokenize(sentence)
        lemmed_tokens = []
        for token in tokens:
            if token not in stop_words:
                lemmed_tokens.append(wordnet_lemmatizer.lemmatize(token))
        lemmed_tagged = nltk.pos_tag(lemmed_tokens)
        for tag_set in lemmed_tagged:
            if 'JJ' in tag_set[1]:
                adj = ''.join([i for i in tag_set[0] if i.isalpha()])
                if adj not in lemmed_adj_dict:
                    lemmed_adj_dict[adj] = 1
                else:
                    lemmed_adj_dict[adj] += 1
    lemmed_adj_sorted_dict = {}
    lemmed_adj_sorted_keys = sorted(lemmed_adj_dict, key=lemmed_adj_dict.get, reverse=True)
    for w in lemmed_adj_sorted_keys:
        lemmed_adj_sorted_dict[w] = lemmed_adj_dict[w]
    del lemmed_adj_sorted_dict['']
    return lemmed_adj_sorted_dict

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [79]:
full_df_list = []
theme_name_list = ['BLESS', 'FORGIVE', 'GOD_SOVEREIGN', 'GRACE', 'GUIDE', 'HEALER', 'HOPE', 'LOVE', 'MERCY', 'OVERCOME', 'PEACE', 
                  'POWERFUL', 'PROTECT', 'RIGHTEOUSNESS', 'SAVE', 'SON_MAN', 'TEACH', 'TRINITY', 'PROVIDE', 'SACRIFICE', 'SEES']
for THEME in theme_name_list:
    file_path = "C:/Users/user/OneDrive - Singapore University of Technology and Design/Documents/SUTD/DH/Final Project/data/"
    file_name = f"{THEME}_2018-2020.csv"
    df = pd.read_csv(file_path+file_name)
    full_df_list.append(df)
full_df = pd.concat(full_df_list, axis=0)
full_df.drop_duplicates(keep="first", inplace=True)
lemmed_sorted_dict, main_df = get_lemmed_sorted_dict(full_df)

In [80]:
above_100_sorted_dict = {}
for key, val in lemmed_sorted_dict.items():
        if lemmed_sorted_dict[key] >= 100:
            above_100_sorted_dict[key] = lemmed_sorted_dict[key]
            
above_100_df = pd.DataFrame.from_dict(above_100_sorted_dict, columns=['Total occurances'], orient='index')
above_100_df

Unnamed: 0,Total occurances
u,2364
good,549
give,346
great,311
new,286
many,279
free,268
eternal,235
true,219
forgive,218


In [62]:
negative_sorted = main_df.sort_values(by=['VADER_score'], kind='mergesort', ascending=True)


In [63]:
positive_sorted = main_df.sort_values(by=['VADER_score'], kind='mergesort', ascending=False)


In [113]:
#adding timestamp
timestamp_list = []
for i in range(len(main_df)):
    date = main_df.iloc[i]["Date"]
    try:
        dt_obj = datetime.strptime(date,'%b %d, %Y')
    except:
        try:
            dt_obj = datetime.strptime(date,'%b %d %Y')
        except:
            timestamp_list.append(None)
            continue
    millisec = dt_obj.timestamp() * 1000
    timestamp_list.append(millisec)
main_df["Timestamp"] = timestamp_list
time_sorted_df = main_df.sort_values(by=['Timestamp'], kind='mergesort', ascending=False)
time_sorted_df.head(5)

Unnamed: 0,Name,Date,Num_comments,Num_retweets,Num_likes,Content,VADER_score,Timestamp
0,@Coll3enG,Jan 31 2020,16,491,10.6K,my future roommate was like “okay so if there’s not enough room for the stripper pole we’ll just put our couch on the balcony” and i would like to thank not only god but jesus to be blessed with the opportunity to live with someone with the same priorities as me,0.9168,1580400000000.0
1,@JoyceMeyer,Jan 26 2020,318,1.3K,7.2K,"Prayer for Healing: Lord, I know You are able to heal me in every way I need healing. I ask for healing right now in Jesus’ name. I reject every doubt that sets itself up against Your Word, & I accept Your gift of healing. Above all things, I trust in You. In Jesus' name, Amen.",0.5574,1579968000000.0
2,@ReggieBush,Jan 28 2020,28,175,1.4K,"Living until you are old does not make you blessed because if that was the case then Jesus was cursed. Kobe fullfilled his purpose while he was here on earth, he impacted the world through basketball so he technically lived a full life. Love those closest to you harder today!",0.3283,1580141000000.0
3,@JasonRomano,Jan 29 2020,9,180,1.1K,Been thinking a lot about death now for a few days & praying we all don’t miss out on the amazing gift of eternal life while we’re here on the Earth. We’ve all got that choice to reject God or accept him thru his son Jesus. I pray you accept & trust in the one who died for you.,0.8519,1580227000000.0
4,@Avenger2Toxic,Jan 28 2020,89,270,559,"Heavenly Father, Fill us with your Holy Spirit. We put on your full armor. Make us wise. May our minds be focused. May we go about our daily tasks with vigor. Smooth the road Lord. Forgive our sin. Cleanse our hearts. Thank you for blessing us. In Jesus Name, Amen",0.9403,1580141000000.0


In [134]:
word = 'full'
chosen_content_list = []
for i in range(len(time_sorted_df)):
    content = time_sorted_df.iloc[i]["Content"]
    if len(re.findall(f"[^a-zA-Z]{word}[^a-zA-Z]", content))>0:
        chosen_content_list.append((content, time_sorted_df.iloc[i]["Name"]))
length = len(chosen_content_list)
step = int(length/7)
counter = 0
print(length)
for i, (content_tuple) in enumerate(chosen_content_list):
    if counter <= 0:
        if i%step==0:
            counter=5
            print(content_tuple[1])
            print(content_tuple[0])
            print()
            counter-=1
    else:
        print(content_tuple[1])
        print(content_tuple[0])
        print()
        counter-=1
    

150
@DelanoWilson3
Dear JESUS.  Give me the  wisdom to know that my storm cannot stop what You've proposed. Thank You that what would stop most people, can't stop me. I declare I'm full of purpose, I have an assignment. I will accomplish everything You ordained for me to accomplish. AMEN.

@riku_maki
Good morning, everyone! It's finally December. Last month of 2020. This year may be full of difficulties and heartbreaks, but let us not lose hope. Christmas season reminds us to be hopeful because our Lord Jesus Christ was born to save us. God bless us all! 

@SevenShepherd
"And Jesus being full of the Holy Ghost returned from Jordan, and was led by the Spirit into the wilderness," ~Luke 4:1 #Bible #God #Hope

@alisamaryel
So many things to be grateful for today.  Let's make the last month of 2020 be full of hope, love & faith to our Lord Jesus Christ. 

@riku_maki
Good morning, everyone! It's finally December. Last month of 2020. This year may be full of difficulties and heartbreaks, but