In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import calendar
from jinja2 import Environment, FileSystemLoader
from dash import Dash, html, dash_table

In [None]:
csv_path = '../data/data/final.csv'
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

df = pd.read_csv(csv_path)

df['Date'] = pd.to_datetime(df['Date'], format='%B %d, %Y')
df['DayOfWeek'] = df['Date'].dt.day_name()

columns_to_aggregate = ['sports', 'literature reading', 'sobber']
weekly_distribution_positive = {col: df.groupby('DayOfWeek')[col].apply(lambda x: (x == '+').sum()) for col in columns_to_aggregate}

for activity in weekly_distribution_positive:
    weekly_distribution_positive[activity] = weekly_distribution_positive[activity].reindex(weekdays)

weekly_distribution_negative_sober = df.groupby('DayOfWeek')['sobber'].apply(lambda x: (x == '-').sum())
weekly_distribution_negative_sober = weekly_distribution_negative_sober.reindex(weekdays)

fig, axs = plt.subplots(len(columns_to_aggregate) + 1, 1, figsize=(10, 5 * (len(columns_to_aggregate) + 1)))

for activity, distribution in weekly_distribution_positive.items():
    fig = px.bar(distribution, x=distribution.index, y=distribution.values, title=f'Weekly Distribution of {activity} (+)')
    fig.update_traces(hovertemplate='Day: %{x}<br>Count: %{y}')
    fig.show()
    fig.write_html(f"./resources/weekdays_{activity}.html")

fig = px.bar(weekly_distribution_negative_sober, x=weekly_distribution_negative_sober.index, y=weekly_distribution_negative_sober.values, title='Weekly Distribution of Drunk (-)')
fig.update_traces(hovertemplate='Day: %{x}<br>Count: %{y}')
fig.show()
fig.write_html("./resources/weekdays_drunk.html")

In [None]:
total_days_sport = df[df['sports'] == '+'].shape[0]
total_days_literature = df[df['literature reading'] == '+'].shape[0]
total_days_sober = df[df['sobber'] == '+'].shape[0]
total_days_unsober = df[df['sobber'] == '-'].shape[0]



# print(total_days_sport)
# print(total_days_literature)
# print(total_days_sober)
# print(total_days_unsober)


In [None]:
monthly_data = df.groupby(df['Date'].dt.to_period('M')).agg({
    'sports': lambda x: (x == '+').sum(),
    'literature reading': lambda x: (x == '+').sum(),
    'sobber': 'count'
})

df = df.drop_duplicates(subset=['Date'])

monthly_data['Sober'] = df[df['sobber'] == '+'].groupby(df['Date'].dt.to_period('M')).count()['sobber']
monthly_data['Drunk'] = df[df['sobber'] == '-'].groupby(df['Date'].dt.to_period('M')).count()['sobber']

monthly_data.fillna(0, inplace=True)

most_sober_month = monthly_data['Sober'].idxmax(), monthly_data['Sober'].max()
most_sport_month = monthly_data['sports'].idxmax(), monthly_data['sports'].max()
most_literature_month = monthly_data['literature reading'].idxmax(), monthly_data['literature reading'].max()

least_sober_month = monthly_data['Drunk'].idxmin(), monthly_data['Drunk'].min()
least_sport_month = monthly_data['sports'].idxmin(), monthly_data['sports'].min()
least_literature_month = monthly_data['literature reading'].idxmin(), monthly_data['literature reading'].min()

monthly_data['Total_Activities'] = monthly_data['sports'] + monthly_data['literature reading'] + monthly_data['Drunk'] #- monthly_data['Sober']
top_activity_month = monthly_data['Total_Activities'].idxmax(), monthly_data['Total_Activities'].max()

# print("Most Sober Month: ", most_sober_month)
# print("Most Sporty Month: ", most_sport_month)
# print("Most Reading Month: ", most_literature_month)

# print("Least Sober Month: ", least_sober_month)
# print("Least Sporty Month: ", least_sport_month)
# print("Least Reading Month: ", least_literature_month)

# print("Top activity month: ", top_activity_month)

formatted_dict = {}
data_tuples = {
    "most_sober_month": most_sober_month,
    "most_sport_month": most_sport_month,
    "most_literature_month": most_literature_month,
    "least_literature_month": least_literature_month,
    "least_sport_month": least_sport_month,
    "least_sober_month": least_sober_month,
    "top_activity_month": top_activity_month
}

for key, (period, count) in data_tuples.items():
    month_name = calendar.month_name[period.month]
    formatted_dict[key] = f"{month_name}, {count} times"


In [None]:
monthly_data_copy = monthly_data.copy()
monthly_data_copy.index = monthly_data_copy.index.strftime('%B')
monthly_data_copy = monthly_data_copy.drop(["sobber"], axis=1)
monthly_data_copy = monthly_data_copy.rename(columns={'sports': 'Sports', 'literature reading': "Reading", 'Total_Activities': 'Total Score'})
monthly_data_copy

In [None]:

# fig = go.Figure(data=[go.Table(
#     header=dict(values=["Date"] + list(monthly_data_copy.columns),
#                 fill_color='paleturquoise',
#                 align='left'),
#     cells=dict(values=[monthly_data_copy.index] + [monthly_data_copy[column].tolist() for column in monthly_data_copy.columns],
#                fill_color='lavender',
#                align='left'))
# ])
# fig.show()

In [None]:
monthly_data_copy = monthly_data_copy.reset_index()
newdf = pd.DataFrame(monthly_data_copy)
json_data = newdf.to_json(orient='records')
newdf

In [None]:

fig = go.Figure()
fig.add_trace(go.Bar(x=monthly_data.index.astype(str), y=monthly_data['sports'], name='Sports'))
fig.add_trace(go.Bar(x=monthly_data.index.astype(str), y=monthly_data['literature reading'], name='Literature'))
fig.add_trace(go.Bar(x=monthly_data.index.astype(str), y=monthly_data['Sober'], name='Sober'))
fig.add_trace(go.Bar(x=monthly_data.index.astype(str), y=monthly_data['Drunk'], name='Drunk'))

fig.update_layout(barmode='stack', title='Monthly Activity Distribution',
                  xaxis_title='Month', yaxis_title='Count')
fig.show()
fig.write_html("./resources/by_months.html")

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format='%B %d, %Y')
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month

heatmap_data = df.pivot_table(index='Day', columns='Month', values='Sentiment', aggfunc='mean')

month_labels = [calendar.month_abbr[i] for i in range(1, 13)]

fig = px.imshow(heatmap_data, labels=dict(x="Month", y="Day", color="Average Sentiment"),
                x=month_labels, y=heatmap_data.index, aspect="auto")

fig.update_layout(title='Yearly Sentiment Heatmap', xaxis_nticks=12)
fig.show()
fig.write_html("./resources/heatmap.html")

In [None]:
monthly_mean_sentiment = df.groupby(df["Date"].dt.month)['Sentiment'].mean()
monthly_mean_sentiment_df = pd.DataFrame({'Month': monthly_mean_sentiment.index, 'Happy Index': monthly_mean_sentiment.values})
monthly_mean_sentiment_df['Month'] = monthly_mean_sentiment_df['Month'].apply(lambda x: calendar.month_abbr[x])
fig = px.line(monthly_mean_sentiment_df, x='Month', y='Happy Index', title='Monthly Happy Index')
fig.show()
fig.write_html("./resources/happy_index.html")


In [None]:
most_happy_day = df.loc[df['Sentiment'].idxmax()]
most_depressive_day = df.loc[df['Sentiment'].idxmin()]
most_happy_day_date = most_happy_day['Date']
most_happy_day_sentiment_value = most_happy_day['Sentiment']
most_happy_day_sentiment = most_happy_day['MD Content']

most_depressive_day_date = most_depressive_day['Date']
most_depressive_day_sentiment_value = most_depressive_day['Sentiment']
most_depressive_day_sentiment = most_depressive_day['MD Content']

In [None]:
print(f"Most Happy Day: {most_happy_day_date.strftime('%B %d, %Y')} with a sentiment score of {most_happy_day_sentiment_value}")
print(most_happy_day_sentiment)
print("__________")
print(f"Most Depressive Day: {most_depressive_day_date.strftime('%B %d, %Y')} with a sentiment score of {most_depressive_day_sentiment_value}")
print(most_depressive_day_sentiment)

### Jinja templating

In [None]:
file_loader = FileSystemLoader('./')
env = Environment(loader=file_loader)
template = env.get_template('index_template.html')
output = template.render(total_days_sport=total_days_sport,
                         total_days_reading=total_days_literature,
                         total_days_sober=total_days_sober,
                         total_days_drunk=total_days_unsober,
                         json_data = json_data,
                         top_sober_month = formatted_dict["most_sober_month"],
                         top_sport_month = formatted_dict["most_sport_month"],
                         top_reading_month = formatted_dict["most_literature_month"],
                         top_drunk_month = formatted_dict["least_sober_month"],
                         top_lazy_month = formatted_dict["least_sport_month"],
                         top_stupid_month = formatted_dict["least_literature_month"],
                         top_happy_day = most_happy_day_date.strftime('%B %d, %Y'),
                         top_depressive_day = most_depressive_day_date.strftime('%B %d, %Y'))
with open('index.html', 'w') as file:
    file.write(output)
