In [2]:
import pandas as pd 
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

def vader_sentiment_analysis(df):
    '''
    Takes in a dataframe with 2 columns named:
    camis, reviews_sw
    reviews_sw = block of text minus stop words
    returns a dataframe of five collums:
    camis, neg, neu, pos, compound
    '''
    sia = SentimentIntensityAnalyzer()
    vader_results = {}
    for i, row in df.iterrows():
        review = row['reviews_sw']
        index_id = i
        vader_results[index_id] = sia.polarity_scores(review)
        # Transposes vader results so that data is the long way.
        vader_results_df = pd.DataFrame(vader_results).T
    return vader_results_df

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/jongarcia/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [11]:
ny = pd.read_csv('ny_sentiment.csv')

In [30]:
ny.head()

Unnamed: 0,camis,dba,boro,phone,inspection_date,latitude,longitude,cuisine_description,action,score,grade,full_address,violation_code,violation_description,last_inspection_date,concatenated_reviews,reviews_sw
0,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2021-08-04,40.734908,-74.002973,Latin American,Violations cited,27,B,62 CHARLES STREET 10014,10B 04L 02H 04H 08A,Plumbing not properly installed or maintained;...,2020-08-04,Horrible food and slow service. Seafood in p...,Horrible food slow service. Seafood paella tas...
1,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2022-05-18,40.734908,-74.002973,Latin American,Violations cited,19,B,62 CHARLES STREET 10014,04L 08C 06C 08A,Evidence of mice or live mice present in facil...,2021-08-04,Superb! I haven’t had good paella since mo...,Superb! I haven’t good paella since moving bac...
2,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2023-05-17,40.734908,-74.002973,Latin American,Violations cited,12,A,62 CHARLES STREET 10014,10B 04L 08A,Anti-siphonage or back-flow prevention device ...,2022-05-18,Gem in the océano of nyc. Spanish dishes and o...,Gem océano nyc. Spanish dishes old school span...
3,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-10-13,40.714861,-73.9982,Coffee/Tea,Violations cited,67,C,26 PELL STREET 10013,02G 04A 08A 06C 02B 04N 05D 10B 10F 09C 06F,Cold food item held above 41º F (smoked fish a...,2020-10-13,Very authentic place. To the point that I ne...,Very authentic place. To point I needed lots h...
4,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-11-24,40.714861,-73.9982,Coffee/Tea,Violations cited,24,B,26 PELL STREET 10013,10F 02G 02B 06C 10B,Non-food contact surface improperly constructe...,2021-10-13,Really great classic Chinese comfort food. Gre...,Really great classic Chinese comfort food. Gre...


In [14]:
ny.concatenated_reviews.isna().sum()

0

In [35]:
ny_head = ny.head()

In [12]:
ny.shape

(2594, 21)

In [4]:
ny_sentiment = vader_sentiment_analysis(ny)
ny_sentiment.head()

Unnamed: 0,neg,neu,pos,compound
0,0.084,0.673,0.244,0.9975
1,0.041,0.63,0.33,0.9998
2,0.055,0.581,0.365,1.0
3,0.06,0.688,0.252,0.7717
4,0.0,0.537,0.463,0.9468


In [5]:
ny = pd.read_csv('ny_reviews_processed.csv')
ny.head()

Unnamed: 0,camis,dba,boro,phone,inspection_date,latitude,longitude,cuisine_description,action,score,...,full_address,violation_code,violation_description,last_inspection_date,avg_service,avg_atmosphere,avg_food,avg_price,reviews_sw,reviews
0,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2021-08-04,40.734908,-74.002973,Latin American,Violations cited,27,...,62 CHARLES STREET 10014,10B 04L 02H 04H 08A,Plumbing not properly installed or maintained;...,2020-08-04,5.0,5.0,5.0,60.612245,Horrible food slow service. Seafood paella tas...,horrible food slow service seafood paella tast...
1,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2022-05-18,40.734908,-74.002973,Latin American,Violations cited,19,...,62 CHARLES STREET 10014,04L 08C 06C 08A,Evidence of mice or live mice present in facil...,2021-08-04,5.0,5.0,5.0,60.612245,Superb! I haven’t good paella since moving bac...,superb havent good paella since moving back ny...
2,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2023-05-17,40.734908,-74.002973,Latin American,Violations cited,12,...,62 CHARLES STREET 10014,10B 04L 08A,Anti-siphonage or back-flow prevention device ...,2022-05-18,5.0,5.0,5.0,60.612245,Gem océano nyc. Spanish dishes old school span...,gem ocano nyc spanish dish old school spanish ...
3,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-10-13,40.714861,-73.9982,Coffee/Tea,Violations cited,67,...,26 PELL STREET 10013,02G 04A 08A 06C 02B 04N 05D 10B 10F 09C 06F,Cold food item held above 41º F (smoked fish a...,2020-10-13,4.333333,4.333333,4.5,17.692308,Very authentic place. To point I needed lots h...,authentic place point needed lot help managed ...
4,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-11-24,40.714861,-73.9982,Coffee/Tea,Violations cited,24,...,26 PELL STREET 10013,10F 02G 02B 06C 10B,Non-food contact surface improperly constructe...,2021-10-13,4.333333,4.333333,4.5,17.692308,Really great classic Chinese comfort food. Gre...,really great classic chinese comfort food grea...


In [7]:
ny.shape

(2594, 21)

In [8]:
ny_reviews_sentiment = pd.concat([ny, ny_sentiment], axis=1)
ny_reviews_sentiment.head()

Unnamed: 0,camis,dba,boro,phone,inspection_date,latitude,longitude,cuisine_description,action,score,...,avg_service,avg_atmosphere,avg_food,avg_price,reviews_sw,reviews,neg,neu,pos,compound
0,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2021-08-04,40.734908,-74.002973,Latin American,Violations cited,27,...,5.0,5.0,5.0,60.612245,Horrible food slow service. Seafood paella tas...,horrible food slow service seafood paella tast...,0.084,0.673,0.244,0.9975
1,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2022-05-18,40.734908,-74.002973,Latin American,Violations cited,19,...,5.0,5.0,5.0,60.612245,Superb! I haven’t good paella since moving bac...,superb havent good paella since moving back ny...,0.041,0.63,0.33,0.9998
2,40364439,SEVILLA RESTAURANT,Manhattan,2129293189,2023-05-17,40.734908,-74.002973,Latin American,Violations cited,12,...,5.0,5.0,5.0,60.612245,Gem océano nyc. Spanish dishes old school span...,gem ocano nyc spanish dish old school spanish ...,0.055,0.581,0.365,1.0
3,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-10-13,40.714861,-73.9982,Coffee/Tea,Violations cited,67,...,4.333333,4.333333,4.5,17.692308,Very authentic place. To point I needed lots h...,authentic place point needed lot help managed ...,0.06,0.688,0.252,0.7717
4,40365904,MEE SUM CAFE,Manhattan,2123495260,2021-11-24,40.714861,-73.9982,Coffee/Tea,Violations cited,24,...,4.333333,4.333333,4.5,17.692308,Really great classic Chinese comfort food. Gre...,really great classic chinese comfort food grea...,0.0,0.537,0.463,0.9468


In [9]:
ny_reviews_sentiment.shape

(2594, 25)

In [10]:
ny_reviews_sentiment.to_csv('ny_reviews_sentiment_ratings.csv', index=False)

In [1]:
ny_reviews_sentiment = pd.read_csv('ny_reviews_sentiment.csv')
ny_reviews_sentiment.head()

NameError: name 'pd' is not defined