In [1]:
import os
import pandas as pd
import glob
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objs as go
import plotly.express as px

In [2]:
def load_dataframes(folder_path):
    dfs = []
    for file in glob.glob(folder_path + "/*.csv"):
        df = pd.read_csv(file)
        company_name = os.path.basename(os.path.dirname(file))
        df['company'] = company_name
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [3]:
fampay_df = load_dataframes("App-Comp/Fampay")

direct_comp_df = load_dataframes("App-Comp/Direct_Comp/*")
mainstream_comp_df = load_dataframes("App-Comp/Mainstream_Comp/*")
potential_comp_df = load_dataframes("App-Comp/Potential_Comp/*")

In [4]:
def sentiment_score(text):
    if not isinstance(text, str):
        return None
    return TextBlob(text).sentiment.polarity # type: ignore

for df in [fampay_df, direct_comp_df, mainstream_comp_df, potential_comp_df]:
    df = df[df['content'].apply(lambda x: isinstance(x, str))]
    df['sentiment'] = df['content'].apply(sentiment_score)  # type: ignore

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['content'].apply(sentiment_score)  # type: ignore
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['content'].apply(sentiment_score)  # type: ignore


In [5]:
def response_rate(df):
    total_reviews = len(df)
    total_responses = len(df[df['replyContent'].notnull()])
    return total_responses / total_reviews * 100

response_rates = {}
for df, comp_type in [(fampay_df, 'Fampay'), (direct_comp_df, 'Direct_Comp'), (mainstream_comp_df, 'Mainstream_Comp'), (potential_comp_df, 'Potential_Comp')]:
    grouped = df.groupby('company')
    response_rates[comp_type] = grouped.apply(response_rate)

In [6]:
dfs = [fampay_df, direct_comp_df, mainstream_comp_df, potential_comp_df]

for i, df in enumerate(dfs):
    df = df[df['content'].apply(lambda x: isinstance(x, str))]
    df['sentiment'] = df['content'].apply(sentiment_score) # type: ignore
    dfs[i] = df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['content'].apply(sentiment_score) # type: ignore
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['content'].apply(sentiment_score) # type: ignore


In [7]:
def plot_time_series(df, comp_type):
    if 'sentiment' not in df.columns:
        print(f"Sentiment column not found in DataFrame for {comp_type}")
        return

    df['at'] = pd.to_datetime(df['at'])
    df['day'] = df['at'].dt.date
    daily_sentiment = df.groupby(['company', 'day'])['sentiment'].mean().reset_index()

    fig = px.line(data_frame=daily_sentiment, x='day', y='sentiment', color='company',
                  title=f"Daily Average Sentiment - {comp_type}")
    fig.update_layout(width=1200, height=500)
    fig.update_xaxes(title_text='Day')
    fig.update_yaxes(title_text='Sentiment')

    fig.show()

for df, comp_type in zip(dfs, ['Fampay', 'Direct_Comp', 'Mainstream_Comp', 'Potential_Comp']):
    plot_time_series(df, comp_type)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [8]:
all_df = pd.concat(dfs, ignore_index=True)
all_df['at'] = pd.to_datetime(all_df['at'])
all_df['day'] = all_df['at'].dt.date

daily_sentiment = all_df.groupby(['company', 'day'])['sentiment'].mean().reset_index()

# Create separate traces for each company
traces = []
for comp_type in daily_sentiment['company'].unique():
    traces.append(go.Scatter(x=daily_sentiment[daily_sentiment['company'] == comp_type]['day'],
                             y=daily_sentiment[daily_sentiment['company'] == comp_type]['sentiment'],
                             mode='lines',
                             name=comp_type))

# Add trace for all companies
traces.append(go.Scatter(x=daily_sentiment[daily_sentiment['company'] == 'Fampay']['day'],
                         y=daily_sentiment[daily_sentiment['company'] == 'Fampay']['sentiment'],
                         mode='lines',
                         name='Fampay'))

# Create layout
layout = go.Layout(title='Daily Average Sentiment - All Companies', width=1200, height=500,
                   xaxis=dict(title='Day'), yaxis=dict(title='Sentiment'))

# Create figure
fig = go.Figure(data=traces, layout=layout)

# Show plot
fig.show()

In [9]:
def rolling_average(df, comp_type):
    if 'sentiment' not in df.columns:
        print(f"Sentiment column not found in DataFrame for {comp_type}")
        return

    df['at'] = pd.to_datetime(df['at'])
    df['day'] = df['at'].dt.date
    daily_sentiment = df.groupby(['company', 'day'])['sentiment'].mean().reset_index()
    daily_sentiment['rolling_average'] = daily_sentiment.groupby('company')['sentiment'].rolling(30).mean().reset_index(0, drop=True)

    fig = px.line(data_frame=daily_sentiment, x='day', y='rolling_average', color='company',
                  title=f"30 Day Rolling Average Sentiment - {comp_type}")
    fig.update_layout(width=1200, height=500)
    fig.update_xaxes(title_text='Day')
    fig.update_yaxes(title_text='Rolling Average Sentiment')

    fig.show()

for df, comp_type in zip(dfs, ['Fampay', 'Direct_Comp', 'Mainstream_Comp', 'Potential_Comp']):
    rolling_average(df, comp_type)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
all_df = pd.concat(dfs, ignore_index=True)
all_df['at'] = pd.to_datetime(all_df['at'])
all_df['day'] = all_df['at'].dt.date

daily_sentiment = all_df.groupby(['company', 'day'])['sentiment'].mean().reset_index()
daily_sentiment['rolling_average'] = daily_sentiment.groupby('company')['sentiment'].rolling(30).mean().reset_index(0, drop=True)

# Create separate traces for each company
traces = []
for comp_type in daily_sentiment['company'].unique():
    traces.append(go.Scatter(x=daily_sentiment[daily_sentiment['company'] == comp_type]['day'],
                             y=daily_sentiment[daily_sentiment['company'] == comp_type]['rolling_average'],
                             mode='lines',
                             name=comp_type))

# Add trace for Fampay
traces.append(go.Scatter(x=daily_sentiment[daily_sentiment['company'] == 'Fampay']['day'],
                         y=daily_sentiment[daily_sentiment['company'] == 'Fampay']['rolling_average'],
                         mode='lines',
                         name='Fampay'))

# Create layout
layout = go.Layout(title='30 Day Rolling Average Sentiment - All Companies', width=1200, height=500,
                   xaxis=dict(title='Day'), yaxis=dict(title='Rolling Average Sentiment'))

# Create figure
fig = go.Figure(data=traces, layout=layout)

# Show plot
fig.show()