In [31]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [32]:
import pandas as pd

visionpro_posts = pd.read_csv('gdrive/My Drive/comp631/631project/task1/VisionPro.csv')
iphone_posts = pd.read_csv('gdrive/My Drive/comp631/631project/task1/iphone.csv')
macbook_posts = pd.read_csv('gdrive/My Drive/comp631/631project/task1/macbook.csv')
applewatch_posts = pd.read_csv('gdrive/My Drive/comp631/631project/task1/AppleWatch.csv')
doc_list = [visionpro_posts, iphone_posts, macbook_posts, applewatch_posts]

In [33]:
!pip install nltk



In [34]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Download required NLTK resources if not already downloaded
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess(text):
    if pd.isna(text) or not text.strip():
        return ''

    # Remove punctuation
    text = ''.join([char for char in text if char.isalnum() or char.isspace()])

    # Tokenization
    tokens = word_tokenize(text)

    # Stop word removal and case folding
    stop_words = set(stopwords.words('english'))
    tokens = [token.lower() for token in tokens if token.lower() not in stop_words]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Join tokens back to string
    return ' '.join(lemmatized_tokens)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [35]:
from nltk.sentiment import SentimentIntensityAnalyzer

# Sentiment Analysis function
def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    score = sia.polarity_scores(text)
    return score['compound'], 'positive' if score['compound'] >= 0.05 else 'negative' if score['compound'] <= -0.05 else 'neutral'

In [36]:
# Preprocess and analyze sentiment for posts
for posts in doc_list:
  posts['Processed Title'] = posts['Title'].apply(preprocess)
  posts['Title Sentiment Score'], posts['Title Sentiment'] = zip(*posts['Processed Title'].apply(analyze_sentiment))

  posts['Processed Post Text'] = posts['Post Text'].apply(preprocess)
  posts['Post Text Sentiment Score'], posts['Post Text Sentiment'] = zip(*posts['Processed Post Text'].apply(analyze_sentiment))

In [37]:
product_names = ["VisionPro", "iphone", "macbook", "AppleWatch"]
# Save the processed data frames to new CSV files
for posts, product_name in zip(doc_list, product_names):
  posts.to_csv("gdrive/My Drive/comp631/631project/task2/" + product_name + "_task2.csv", index=False)

In [38]:
applewatch_posts

Unnamed: 0.1,Unnamed: 0,Title,Post Text,ID,Score,Total Comments,Upvote Ratio,Post URL,Processed Title,Title Sentiment Score,Title Sentiment,Processed Post Text,Post Text Sentiment Score,Post Text Sentiment
0,0,RIP to everyone who on 9/11,,16gdesx,2662,317,0.78,https://reddit.com/16gdesx,rip everyone 911,0.0000,neutral,,0.0000,neutral
1,1,Please Apple. Please. Please give me this watc...,,16pzfgj,1962,242,0.93,https://reddit.com/16pzfgj,please apple please please give watch face don...,0.6886,positive,,0.0000,neutral
2,2,Why is my watch calling me an idiot? Is there ...,It calls me idiot in notifications and on thes...,16l8oph,1781,110,0.93,https://reddit.com/16l8oph,watch calling idiot fix,-0.5106,negative,call idiot notification badge way change real ...,-0.5106,negative
3,3,Removing part off a tattoo to make the Apple W...,Did this for Apple Watch to work completely. ...,169295k,1617,512,0.89,https://reddit.com/169295k,removing part tattoo make apple watch wrist de...,0.0000,neutral,apple watch work completely first 4picture res...,-0.1779,negative
4,4,Charging the Watch with your iPhone got to be ...,,16qzl25,1354,165,0.95,https://reddit.com/16qzl25,charging watch iphone got best utilization usb...,0.6369,positive,,0.0000,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,No one is missing the custom icon layouts we u...,I really was surprised to see my custom icon l...,16o63ah,184,220,0.78,https://reddit.com/16o63ah,one missing custom icon layout used able surpr...,-0.0772,negative,really surprised see custom icon layout gone r...,0.4690,positive
96,96,"""Midnight"" Apple Watch 9",Is it black or blue? I really thought it was b...,16kmq3d,183,104,0.87,https://reddit.com/16kmq3d,midnight apple watch 9,0.0000,neutral,black blue really thought black ordered today ...,0.0000,neutral
97,98,I get it watch…I know…workin’ on it…,,16x9nyn,182,44,0.93,https://reddit.com/16x9nyn,get watchi knowworkin,0.0000,neutral,,0.0000,neutral
98,99,Anyone having battery issues with watchOS 10?,Since I updated my Apple Watch Ultra to watchO...,16sm0ty,180,185,0.89,https://reddit.com/16sm0ty,anyone battery issue watchos 10,0.0000,neutral,since updated apple watch ultra watchos 10 bat...,0.0000,neutral


In [39]:
no_use='''
  # Preprocess and analyze sentiment for comments
  comments['Processed Comment Content'] = comments['Comment Content'].apply(preprocess)
  comments['Comment Sentiment Score'], comments['Comment Sentiment'] = zip(*comments['Processed Comment Content'].apply(analyze_sentiment))
  '''

# Define the new file names

#processed_posts_file = 'gdrive/My Drive/comp631/631project/task2/VisionPro_post_processed_sentiment.csv'
#processed_comments_file = 'gdrive/My Drive/comp631/631project/task2/VisionPro_comments_processed_sentiment.csv'