In [1]:
import pandas as pd
import numpy as np

In [2]:
# 1. Load user reviews
reviews = pd.read_csv("dataset/UserReview.csv")

In [3]:
# 2. Clean up missing values
reviews.replace('nan', np.nan, inplace=True)
reviews = reviews.dropna(subset=['Sentiment'])

In [4]:
# 3. Convert sentiment columns to numbers
reviews['Sentiment_Polarity'] = reviews['Sentiment_Polarity'].astype(float)
reviews['Sentiment_Subjectivity'] = reviews['Sentiment_Subjectivity'].astype(float)

In [5]:
# 4. Aggregate sentiment per app
summary = reviews.groupby('App').agg(
    Avg_Polarity=('Sentiment_Polarity', 'mean'),
    Avg_Subjectivity=('Sentiment_Subjectivity', 'mean'),
    Review_Count=('Sentiment_Polarity', 'count')
).reset_index()

In [6]:
# 5. Save the summary
summary.to_csv("processed/UserReview_Aggregated.csv", index=False)

In [7]:
# 6. Show a quick preview
print(summary.head())

                                App  Avg_Polarity  Avg_Subjectivity  \
0             10 Best Foods for You      0.470733          0.495455   
1  104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室      0.392405          0.545516   
2                              11st      0.181294          0.443957   
3        1800 Contacts - Lens Store      0.318145          0.591098   
4   1LINE – One Line with One Touch      0.196290          0.557315   

   Review_Count  
0           194  
1            40  
2            40  
3            80  
4            38  
