In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio
import numpy as np
from textblob import TextBlob

In [19]:
df = pd.read_csv('../../Warehouse/Reviews/app_reviews_merged.csv')
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt
0,c3d58f95-ea33-4c1d-8984-49196427bf47,INSANE MINDED,https://play-lh.googleusercontent.com/a-/ACB-R...,After the latest update I'm not able to pay to...,1,0,,2023-04-25 23:32:36,,
1,0982982c-a52e-4597-a5e8-e6b643d10f5f,Nikhil Goud,https://play-lh.googleusercontent.com/a/AGNmyx...,My fam not working and account too iam facing ...,1,0,3.1.6-famx,2023-04-25 23:27:28,,
2,8b84cdf7-7659-40dc-8128-d49e1d2d97d0,Sai Ch,https://play-lh.googleusercontent.com/a-/ACB-R...,It is now commercial app,1,0,,2023-04-25 23:19:38,,
3,0c853464-de3b-416f-82ef-db2057e19bf9,Sagar Sambhi,https://play-lh.googleusercontent.com/a-/ACB-R...,Famx new update not received my payment please...,1,0,3.1.6-famx,2023-04-25 23:06:56,,
4,d04d2ed5-4272-4bb0-8d58-27435d2749d9,Gagankumar K,https://play-lh.googleusercontent.com/a-/ACB-R...,I want a card,1,0,3.1.6-famx,2023-04-25 22:39:27,,


In [20]:
df['at'] = pd.to_datetime(df['at'])
df = df.set_index('at')

In [21]:
# Define positive and negative sentiment threshold (customize as needed)
positive_threshold = 4
negative_threshold = 2

# Resample by week and count positive and negative reviews
weekly_df = df.resample('W').agg({
    'score': [
        ('positive_reviews', lambda x: (x > positive_threshold).sum()),
        ('negative_reviews', lambda x: (x < negative_threshold).sum())
    ]
})

In [22]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=weekly_df.index, y=weekly_df['score']['positive_reviews'], 
                         name='Positive Reviews', line=dict(color='#1f77b4')))
fig.add_trace(go.Scatter(x=weekly_df.index, y=weekly_df['score']['negative_reviews'], 
                         name='Negative Reviews', line=dict(color='#ff7f0e')))
fig.update_layout(title='Trends in Positive and Negative Reviews', 
                  xaxis_title='Time', yaxis_title='Number of Reviews', 
                  legend=dict(x=0, y=1, bgcolor='rgba(255, 255, 255, 0.5)', bordercolor='rgba(0, 0, 0, 0)'))
fig.show()
pio.write_html(fig, "../../fam-report-site/public/App-Analytics/trends.html")

In [23]:
daily_df = df.resample('D').agg({'score': 'mean'})

In [24]:
# Choose a window size (e.g., 7 days for a weekly rolling average)
window_size = 5

rolling_average = daily_df.rolling(window=window_size).mean()

In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=rolling_average.index, y=rolling_average['score'], 
                         name='Rolling Average', line=dict(color='#1f77b4')))
fig.update_layout(title='Rolling Average of Review Scores', 
                  xaxis_title='Time', yaxis_title='Average Score', 
                  legend=dict(x=0, y=1, bgcolor='rgba(255, 255, 255, 0.5)', bordercolor='rgba(0, 0, 0, 0)'))
pio.write_html(fig, "../../fam-report-site/public/App-Analytics/rolling-average.html")
fig.show()

In [26]:
average_score = np.mean(df["score"])
average_length = np.mean(df["content"].apply(len))

print(f'Average Review Score: {average_score:.2f}')
print(f'Average Review Length: {average_length:.2f}')

df["polarity"] = df["content"].apply(lambda x: TextBlob(x).sentiment.polarity)

average_polarity = np.mean(df["polarity"])

print(f'Average Review Polarity: {average_polarity:.2f}')


Average Review Score: 3.31
Average Review Length: 51.73
Average Review Polarity: 0.25
