In [1]:
## Packages and libraries:
import json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer, CountVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score
import gensim 

import warnings
warnings.filterwarnings('ignore')
#from apiclient.discovery import build
#import api_key as api_key

In [2]:
import text2emotion as te
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

#nltk.download('vader_lexicon')
sent = SentimentIntensityAnalyzer()

[nltk_data] Downloading package stopwords to /home/t0ad/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/t0ad/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/t0ad/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
posts = pd.read_csv('../data/homeless_posts.csv')
posts.drop(columns = ['Unnamed: 0',], inplace = True)
posts.head(2)

Unnamed: 0,post_id,author,location,date,post,Filter_exp_2a,Filter_2a_labels,Word Count
0,182941686,Beth Daoud,Congress Park,9 Apr,Squatters Camp. Am I the only one outraged by ...,2,Homeless,71
1,184972043,Deb Christian,Hudson Street,27 Apr,It’s this whole thing of changing the housing ...,2,Homeless,28


In [4]:
comments = pd.read_csv('../data/all_comments.csv')
comments.drop(columns = ['Unnamed: 0','Unnamed: 0.1'],inplace = True)
comments.head(2)

Unnamed: 0,post_id,author,location,date,comment
0,186113992,Gabe Dusenbury,• Hale,3 days ago,"Not a great look, Park Hill!"
1,186113992,Andrew Walvoord,• Hilltop,3 days ago,Why not? Why should the city be putting homele...


In [5]:
comments.to_csv('../data/homeless_comments.csv')

In [6]:
def df_predict_emo(df_col, title):
    # lists for holding emotion scores 
    angry = []
    surprise = []
    sad = []
    fear = []
    happy = []
    # list of tuples to itterate over 
    targets = [(angry, 'Angry'),(surprise,'Surprise'),(sad,'Sad'),(fear, "Fear"),(happy, "Happy")]
    # get score of eac post in DF and append to list for each emotion 
    for p in df_col:
        largest = 0
        scores = te.get_emotion(p)
        for l , emotion in targets:
            l.append(scores[f'{emotion}'])

    # create columns in DF for scores         
    for l, emotion in targets:
        title[f'{emotion}'] = [ li for li in l ]  

    p_scores = []
    for post in df_col:
        p_score = sent.polarity_scores(post)
        p_scores.append(p_score['compound'])  
    title['Polarity'] = [ s for s in p_scores]

In [7]:
def personal_report(df):
    new_dfs = []
    authors = [a for a in list(df['author'].unique())]
    emotions = ['Angry','Surprise','Sad','Fear','Happy','Polarity']
    sum_list = []
    max_list = []
    min_list = []
    mean_list = []
    
    for a in authors:
        for emo in emotions:
            sum_list.append((df[f'{emo}'].sum(), emo))
            max_list.append((df[f'{emo}'].max(), emo))
            min_list.append((df[f'{emo}'].min(), emo))
            mean_list.append((df[f'{emo}'].mean(), emo))

        new = pd.DataFrame()
        # new['Emotion'] = [e for e in emotions ]
        new['Total'] = [s for s in sum_list]
        new['Max'] = [ma for ma in max_list]
        new['Min'] = [mi for mi in min_list]
        new['Mean'] = [me for me in mean_list]
        new['Location'] = df['location'][0]
        new['author'] = [ a for a in authors ]
    new_dfs.append(new)
    return new_dfs

In [8]:
def sentiment_report(df):
    emotions = ['Angry','Surprise','Sad','Fear','Happy','Polarity']
    sum_list = []
    max_list = []
    min_list = []
    mean_list = []
    for emo in emotions:
        sum_list.append(df[f'{emo}'].sum())
        max_list.append(df[f'{emo}'].max())
        min_list.append(df[f'{emo}'].min())
        mean_list.append(df[f'{emo}'].mean())

    new = pd.DataFrame()
    new['Emotion'] = [e for e in emotions ]
    new['Total'] = [s for s in sum_list]
    new['Max'] = [ma for ma in max_list]
    new['Min'] = [mi for mi in min_list]
    new['Mean'] = [me for me in mean_list]
    new['Location'] = df['location'][0]
    return new

In [9]:
# getting rid of comments from irrelavent posts
keepers = [ p_id for p_id in posts['post_id']]
for i, p_id in enumerate(comments['post_id']):
    if p_id not in keepers:
        comments.drop(index = i, inplace = True)

In [10]:
df_predict_emo(posts['post'], posts)
df_predict_emo(comments['comment'], comments)

In [11]:
posts['location'].isna().sum()
posts['location']=posts['location'].fillna('Gov')

In [12]:
locations = list(posts.location.unique())
loc_df_list = []
for i, loc in enumerate(locations):
    loc = posts[posts['location'] == loc]
    loc_df_list.append(loc)

In [14]:
for loc_df in loc_df_list:
    for name in locations:
        if name != np.NaN:
            print(name)
            name = name.split('/')[0]
            loc_df.to_csv(f'../data/results/prelim/{name}_loc.csv')

Congress Park
Hudson Street
Mayfair
Cherry Creek North
Hale
City Park
Crestmoor/Mayfair Park
Hilltop
Gov
South Park Hill
North Park Hill
East Colfax
Aurora Arts District
Lowry
Central Park
Northeast Park Hill
North Aurora/Fitzsimons
Wash Park West
Barnum
Historic Montclair
Sloans/Highlands
Aurora Cultural Arts District
Montbello
Hoffman Heights
RiNo Art District
Parkfield
Westwood
Vasquez Blvd
Fort Logan
Barnum West
Beeler Park
Locust St
92nd and Grove
Morris Heights
Highlands
Berkeley-Regis
Thornton
Arvada Columbine
Aurora
Westy Hills
Bluff Lake Aurora
Allendale
Cheesman Park
Memorial Park
Holly Hills
Skyland
Old Thornton
Whittier
Wandering View
Windsor Gardens East
Wyco Park
Congress Park
Hudson Street
Mayfair
Cherry Creek North
Hale
City Park
Crestmoor/Mayfair Park
Hilltop
Gov
South Park Hill
North Park Hill
East Colfax
Aurora Arts District
Lowry
Central Park
Northeast Park Hill
North Aurora/Fitzsimons
Wash Park West
Barnum
Historic Montclair
Sloans/Highlands
Aurora Cultural Arts Di

In [28]:
authors = posts.drop(columns = ['post_id','Filter_exp_2a']).groupby(['author']).mean()
cm = sns.light_palette("pink", as_cmap=True);
authors = authors.style.background_gradient(cmap=cm);
authors = authors.highlight_max(color = 'red');

In [31]:
import dataframe_image as dfi

In [40]:
authors.export_png('../data/results/Authors.png', max_rows = 50)

In [43]:
locations = posts.drop(columns = ['post_id','Filter_exp_2a']).groupby(['location']).mean()
cm = sns.light_palette("blue", as_cmap=True);
locations = locations.style.background_gradient(cmap=cm);
locations = locations.highlight_max(color = 'green');

In [44]:
locations.export_png('../data/results/Locations.png', max_rows = 50)

In [18]:
# emotions = ['Angry','Surprise','Sad','Fear','Happy','Polarity']

# results = []
# for df in loc_df_list:
#     print('h')
#     for name in locations:
#         print(name)
#         name = name.split('/')[0]
#         print(name)
#         res = sentiment_report(df)
#         results.append(res)
#         res.to_csv(f'../data/results/final/{name}_res.csv')
        
        
# authors = [a for a in list(posts['author'].unique())]

# people = []
# for name in authors:
#         #name = name.split('/')[0]
#         res = personal_report(posts)
#         people.append(res)
#         res.to_csv(f'../data/results/final/Z.{name}_person_res.csv')

h
Congress Park
Congress Park
Hudson Street
Hudson Street
Mayfair
Mayfair
Cherry Creek North
Cherry Creek North
Hale
Hale
City Park
City Park
Crestmoor/Mayfair Park
Crestmoor
Hilltop
Hilltop
Gov
Gov
South Park Hill
South Park Hill
North Park Hill
North Park Hill
East Colfax
East Colfax
Aurora Arts District
Aurora Arts District
Lowry
Lowry
Central Park
Central Park
Northeast Park Hill
Northeast Park Hill
North Aurora/Fitzsimons
North Aurora
Wash Park West
Wash Park West
Barnum
Barnum
Historic Montclair
Historic Montclair
Sloans/Highlands
Sloans
Aurora Cultural Arts District
Aurora Cultural Arts District
Montbello
Montbello
Hoffman Heights
Hoffman Heights
RiNo Art District
RiNo Art District
Parkfield
Parkfield
Westwood
Westwood
Vasquez Blvd
Vasquez Blvd
Fort Logan
Fort Logan
Barnum West
Barnum West
Beeler Park
Beeler Park
Locust St
Locust St
92nd and Grove
92nd and Grove
Morris Heights
Morris Heights
Highlands
Highlands
Berkeley-Regis
Berkeley-Regis
Thornton
Thornton
Arvada Columbine
Arv

KeyError: 0