In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, classification_report
from textblob import TextBlob

file = "./__datasets/reddit_posts_comments__food.json"
df = pd.read_json(file)

df

Unnamed: 0,id,title,text,comments
0,1jnoobd,[i ate]Japanese Eel Rice,,I've only had eel sushi and turns out I really...
1,1jngk75,[Homemade] Limoncello,"From Backyard to Bottle, homemade Limoncello. ...",nice job. looks great. how's the taste? (and t...
2,1jns37m,[Homemade]dutch oven pot roast,,"The food looks so good, it’s making my mouth w..."
3,1jnuggm,[homemade] Cajun Shrimp Risotto,,Wow. That looks yummy! | I bet that was amazin...
4,1jnrrj6,Chicken Shawarma [homemade],,Look delicious! | What was in the marinade? | ...
...,...,...,...,...
95,1jo0ms8,[I ate] Falafel Kebabs for dinner,,|
96,1jnqyde,[homemade] cod poached in coconut milk (two wa...,,Is there a recipe you pulled this from? I woul...
97,1jnu7f6,[homemade] Pollo Roja Taquitos with Black Refr...,This was painstakingly homemade by me. Wasn’t ...,|
98,1jno97e,[Homemade] Palak Pakodi,"It is an indian snack made from spinach, onion...",Those are dangerously good. |


In [None]:
df = df.astype(str).apply(lambda col: col.str.lower())

df['category'] = df['title'].str.extract(r'\[(.*?)\]')

df['title'] = df['title'].str.replace(r'\[\s*(.*?)\s*\]\s*', '', regex=True)

df.replace(r'^\s*$', "NA", regex=True, inplace = True)

In [2]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    
    text = re.sub(r'[^a-zA-Z\s]', "", text)
    
    text = ' '.join(word for word in text.split() if word not in stop_words)
    
    return text

df["title"] = df["title"].apply(clean_text)
df["text"] = df["text"].apply(clean_text)
df["comments"] = df["comments"].apply(clean_text)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/erickxu/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
df

Unnamed: 0,id,title,text,comments
0,1jnoobd,ateJapanese Eel Rice,,Ive eel sushi turns I really like way prepare ...
1,1jngk75,Homemade Limoncello,From Backyard Bottle homemade Limoncello ml bo...,nice job looks great hows taste proof Great jo...
2,1jns37m,Homemadedutch oven pot roast,,The food looks good making mouth water looking...
3,1jnuggm,homemade Cajun Shrimp Risotto,,Wow That looks yummy I bet amazing Looks delic...
4,1jnrrj6,Chicken Shawarma homemade,,Look delicious What marinade Recipe please Yum...
...,...,...,...,...
95,1jo0ms8,I ate Falafel Kebabs dinner,,
96,1jnqyde,homemade cod poached coconut milk two ways man...,,Is recipe pulled I would love try day I wasnt ...
97,1jnu7f6,homemade Pollo Roja Taquitos Black Refried Bea...,This painstakingly homemade Wasnt following re...,
98,1jno97e,Homemade Palak Pakodi,It indian snack made spinach onion besan deep ...,Those dangerously good


In [4]:
def get_sentiment(text):
    
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    return sentiment

df['comment_sentiment'] = df['comments'].apply(get_sentiment)

df['comment_sentiment'] = df['comment_sentiment'].apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))

df

Unnamed: 0,id,title,text,comments,comment_sentiment
0,1jnoobd,ateJapanese Eel Rice,,Ive eel sushi turns I really like way prepare ...,1
1,1jngk75,Homemade Limoncello,From Backyard Bottle homemade Limoncello ml bo...,nice job looks great hows taste proof Great jo...,1
2,1jns37m,Homemadedutch oven pot roast,,The food looks good making mouth water looking...,1
3,1jnuggm,homemade Cajun Shrimp Risotto,,Wow That looks yummy I bet amazing Looks delic...,1
4,1jnrrj6,Chicken Shawarma homemade,,Look delicious What marinade Recipe please Yum...,1
...,...,...,...,...,...
95,1jo0ms8,I ate Falafel Kebabs dinner,,,0
96,1jnqyde,homemade cod poached coconut milk two ways man...,,Is recipe pulled I would love try day I wasnt ...,1
97,1jnu7f6,homemade Pollo Roja Taquitos Black Refried Bea...,This painstakingly homemade Wasnt following re...,,0
98,1jno97e,Homemade Palak Pakodi,It indian snack made spinach onion besan deep ...,Those dangerously good,1
