In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
df = pd.read_csv('C:\\ESTERA\\CCT\\TWEETS\\sentiment.csv')
df.head()

# EDA

In [None]:
df.info()

In [None]:
df.head()

In [None]:
# counting the number of tweets per day
tweets_per_day = df['Date'].value_counts().sort_index()

# creating a histogram for the distribution of tweets per day
plt.figure(figsize=(10, 6))
plt.hist(tweets_per_day, bins=5, alpha=0.5, color='b', edgecolor='black')
plt.xlabel('Number of Tweets Per Day')
plt.ylabel('Frequency')
plt.title('Distribution of Tweets Per Day')
plt.show()

In [None]:
# converting the 'Date' column to datetime if it's not already
df['Date'] = pd.to_datetime(df['Date'])

# extracting the date component and store it in a new column 'DateOnly'
df['DateOnly'] = df['Date'].dt.date

# finding the oldest date
oldest_date = df['DateOnly'].min()

# finding the newest date
newest_date = df['DateOnly'].max()

print("Oldest Date:", oldest_date)
print("Newest Date:", newest_date)

In [None]:
# calculating the number of days between the oldest and newest dates
days_between = (newest_date - oldest_date).days

print("Number of days between the oldest and newest date:", days_between)

In [None]:
# finding the highest sentiment score
highest_sentiment = df['SentimentScore'].max()

# finding the lowest sentiment score
lowest_sentiment = df['SentimentScore'].min()

print("Highest Sentiment Score:", highest_sentiment)
print("Lowest Sentiment Score:", lowest_sentiment)

In [None]:
#!pip install plotly

In [None]:
import plotly.express as px

# creating an interactive box plot
fig = px.box(df, x=df['Date'].dt.date, y='SentimentScore', labels={'x': 'Date', 'y': 'Sentiment Score'}, title='Distribution of Sentiment Scores for Each Day')
fig.update_xaxes(tickangle=90)

# showing the interactive plot
fig.show()

In [None]:
# setting the figure size for the plot
plt.figure(figsize=(15, 8))

# creating the grouped box plot
sns.set(style="whitegrid")
sns.boxplot(data=df, x=df['Date'].dt.date, y='SentimentScore')

# rotating x-axis labels for better readability
plt.xticks(rotation=90)

# setting labels and title
plt.xlabel('Date')
plt.ylabel('Sentiment Score')
plt.title('Distribution of Sentiment Scores for Each Day')

# showing the plot
plt.show()

In [None]:
# counting the number of rows with sentiment scores lower than -0.7
count = (df['SentimentScore'] < -0.7).sum()

# calculating the total number of rows in the DataFrame
total_rows = len(df)

# calculating the percentage
percentage = (count / total_rows) * 100

print("Number of rows with sentiment scores lower than -0.7:", count, "which is: {:.2f}%".format(percentage), 'of entire data.')

In [None]:
# setting the date for comparison
comparison_date = pd.to_datetime('2009-06-16')

# calculating the average sentiment before and after the comparison date
before_average = df[df['Date'] < comparison_date]['SentimentScore'].mean()
after_average = df[df['Date'] >= comparison_date]['SentimentScore'].mean()

# rounding the averages to two decimal places
before_average_rounded = round(before_average, 2)
after_average_rounded = round(after_average, 2)

# printing the results
print(f"The average sentiment before {comparison_date} is: {before_average_rounded}")
print(f"The average sentiment after {comparison_date} is: {after_average_rounded}")

In [None]:
df['Date'] = pd.to_datetime(df['Date'])  
df['DayOfWeek'] = df['Date'].dt.strftime('%A') 

average_sentiment_per_day = df.groupby('DayOfWeek')['SentimentScore'].mean()
average_sentiment_per_day

In [None]:
# calculating the average sentiment per day
sentiment_per_day = df.groupby('DayOfWeek')['SentimentScore'].mean()

# creating a new DataFrame 'df4' with 'DayOfWeek' as index and 'SentimentScore' as the column
df4 = sentiment_per_day.reset_index(name='SentimentScore')
df4

In [None]:
# sortting the DataFrame by 'SentimentScore' in descending order
df4 = df4.sort_values(by='SentimentScore', ascending=True)

# creating an interactive horizontal bar chart
fig = px.bar(df4, x='SentimentScore', y='DayOfWeek', orientation='h', title='Average Sentiment Score per Day of the Week')
fig.update_layout(xaxis_title='Average Sentiment Score', yaxis_title='Day of the Week', showlegend=False)

# showing the interactive chart
fig.show()

In [None]:
df

In [None]:
#import plotly.express as px
#import plotly.graph_objects as go

# Extract hour and day of the week
df['Hour'] = pd.to_datetime(df['Time']).dt.hour
df.head()

In [None]:
# groupping by 'DayOfWeek' and 'Hour', then calculate the average sentiment
average_sentiment_per_hour = df.groupby(['DayOfWeek', 'Hour'])['SentimentScore'].mean().reset_index()

print(average_sentiment_per_hour)

In [None]:
# groupping data by DayOfWeek and Hour, and calculating the average SentimentScore
grouped_data = df.groupby(['DayOfWeek', 'Hour'])['SentimentScore'].mean().unstack()

# creating the line chart
fig, ax = plt.subplots(figsize=(12, 6))
for day, data in grouped_data.iterrows():
    ax.plot(data.index, data.values, label=day)

ax.set_xlabel('Hour')
ax.set_ylabel('Sentiment Score')
ax.set_title('Average Sentiment Scores by Hour for Each Day of the Week')
ax.legend(loc='upper right')
plt.grid(True)
plt.show()

### Checking how many tweets per day there was before sentiment dropped

In [None]:
# defining the split date
split_date = pd.to_datetime('2009-06-17')

# counting the number of tweets for each day before and after the split date
tweets_before = len(df[(df['Date'] < split_date)])
tweets_after = len(df[(df['Date'] >= split_date)])

# calculating the number of days before and after the split date
days_before = (split_date - df['Date'].min()).days
days_after = (df['Date'].max() - split_date).days

# calculating the average number of tweets per day before and after the split date
average_tweets_before = tweets_before / days_before
average_tweets_after = tweets_after / days_after

# printing the results
print("Average Number of Tweets Per Day Before", split_date, ":", average_tweets_before)
print("Average Number of Tweets Per Day After", split_date, ":", average_tweets_after)

In [None]:
# counting the number of tweets per day and sort by date
tweets_per_day = df['Date'].dt.date.value_counts().sort_index().reset_index()
tweets_per_day.columns = ['Date', 'Count']

# adding a color column based on the date
tweets_per_day['Color'] = ['Before 2009-06-17' if date < pd.to_datetime('2009-06-17') else 'After 200-06-17' for date in tweets_per_day['Date']]

# creating an interactive bar chart
fig = px.bar(tweets_per_day, x='Date', y='Count', color='Color',
             title='Number of Tweets Per Day',
             labels={'Count': 'Number of Tweets'},
             category_orders={'Color': ['skyblue', 'After 2009-06-17']})

fig.update_xaxes(title='Date', categoryorder='total ascending')
fig.update_yaxes(title='Number of Tweets')

# showing the interactive chart
fig.show()

In [None]:
# couting the number of tweets per day
#tweets_per_day = df['Date'].dt.date.value_counts().sort_index()

# plotting the number of tweets per day
#plt.figure(figsize=(10, 6))

# defining a color map
#colors = ['skyblue' if date < pd.to_datetime('2009-06-17') else 'lightcoral' for date in tweets_per_day.index]

# plotting the bars with different colors for dates before and after '2009-06-17'
#plt.bar(tweets_per_day.index, tweets_per_day, color=colors)

#plt.xlabel('Date')
#plt.ylabel('Number of Tweets')
#plt.title('Number of Tweets Per Day')
##plt.xticks(rotation=90)
#plt.tight_layout()
#plt.show()

### Average Sentiment

In [None]:
# groupping the data by date and calculate the average sentiment score for each day
daily_average_sentiment = df.groupby(df['Date'].dt.date)['SentimentScore'].mean()

# resetting the index to make the result a DataFrame
daily_average_sentiment = daily_average_sentiment.reset_index()

# renaming the columns
daily_average_sentiment.columns = ['Date', 'AverageSentimentScore']

# printing the result
print(daily_average_sentiment)

In [None]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import datetime

# Create a Dash app
dynamic_chart_app  = dash.Dash(__name__)


# Convert the Date column to a datetime object
daily_average_sentiment = pd.DataFrame(daily_average_sentiment)
daily_average_sentiment['Date'] = pd.to_datetime(daily_average_sentiment['Date'])

dynamic_chart_app.layout = html.Div([
    html.H1('Average Sentiment Score Over Time'),
    dcc.Graph(id='line-chart'),
])

@app.callback(
    Output('line-chart', 'figure'),
    Input('line-chart', 'relayoutData')
)
def update_line_chart(relayoutData):
    # Filter the data based on zoom level
    if relayoutData and 'xaxis.range' in relayoutData:
        start_date = datetime.datetime.fromisoformat(relayoutData['xaxis.range'][0])
        end_date = datetime.datetime.fromisoformat(relayoutData['xaxis.range'][1])
        filtered_df = daily_average_sentiment[(daily_average_sentiment['Date'] >= start_date) & (daily_average_sentiment['Date'] <= end_date)]
    else:
        filtered_df = daily_average_sentiment

    fig = px.line(filtered_df, x='Date', y='AverageSentimentScore', title='Average Sentiment Score Over Time')
    return fig

if __name__ == '__main__':
    dynamic_chart_app.run_server(debug=True)

In [None]:
# groupping the data by date and calculating the average sentiment score for each day
daily_average_sentiment = df.groupby(df['Date'].dt.date)['SentimentScore'].mean()

# resetting the index to make the result a DataFrame
daily_average_sentiment = daily_average_sentiment.reset_index()

# renaming the columns
daily_average_sentiment.columns = ['Date', 'AverageSentimentScore']

# creating an interactive line chart with a slider
fig = px.line(daily_average_sentiment, x='Date', y='AverageSentimentScore',
              title='Daily Average Sentiment Score',
              labels={'AverageSentimentScore': 'Average Sentiment Score'},
              range_x=['start_date', 'end_date']) 

fig.update_xaxes(title='Date')
fig.update_yaxes(title='Average Sentiment Score')

# showing the interactive chart
fig.show()

In [None]:
# creating a time series plot to visualize the daily sentiment trends
#plt.figure(figsize=(12, 6))
#plt.plot(daily_average_sentiment['Date'], daily_average_sentiment['AverageSentimentScore'], label='Average Sentiment Score', color='b')
#plt.xlabel('Date')
#plt.ylabel('Average Sentiment Score')
#plt.title('Daily Average Sentiment Score Over Time')
#plt.grid(True)
#plt.legend()
#plt.xticks(rotation=45)
#plt.show()

In [None]:
# calculating the average sentiment
average_sentiment = daily_average_sentiment['AverageSentimentScore'].mean()

# rounding it to two decimal places
average_sentiment_rounded = round(average_sentiment, 2)

# printing the result
print(f"The average sentiment is: {average_sentiment_rounded}")

In [None]:
# converting 'Date' to datetime
daily_average_sentiment['Date'] = pd.to_datetime(daily_average_sentiment['Date'])

# filtering data before and after 2009-06-17
before_date = pd.to_datetime('2009-06-17')
before_average_sentiment = daily_average_sentiment[daily_average_sentiment['Date'] < before_date]['AverageSentimentScore'].mean()
after_average_sentiment = daily_average_sentiment[daily_average_sentiment['Date'] >= before_date]['AverageSentimentScore'].mean()

# rounding the average sentiment scores
before_average_sentiment = round(before_average_sentiment, 2)
after_average_sentiment = round(after_average_sentiment, 2)

# printing the results
print(f"Average sentiment score before {before_date}: {before_average_sentiment}")
print(f"Average sentiment score after {before_date}: {after_average_sentiment}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
# Define the data for the pie chart (labels, values, and colors)
labels = ['', 'Negative', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Positive', '']
values = [0.5, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125,
          0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125]
colors = ['none', '#FF0000', '#FF2200', '#FF4400', '#FF6600', '#FF8800', '#FFAA00', '#FFCC00', '#FFFF33',
          '#FFFF66', '#FFFF99', '#FFFFCC', '#CCFFCC', '#99FF99', '#66FF66', '#33FF33', '#00FF00']

# Create the pie chart
fig, ax = plt.subplots()
ax.pie(values, labels=labels, colors=colors, startangle=0, counterclock=False, radius=1.5, wedgeprops={'width': 0.4})
ax.axis('equal')
ax.set_title('Tweets average Sentiment')

# Add lines to the chart
x1, y1 = 1, 0.01
angle_rad = np.radians(86)
x = [0, 0.8 * np.cos(angle_rad)]
y = [0, 0.8 * np.sin(angle_rad)]
ax.plot(x, y, color='black', linestyle='--', linewidth=1)

# Add the additional points with custom coordinates
offset = 0.05
ax.annotate('0.14', xy=(x[1], y[1] + offset), fontsize=12, color='blue', ha='center', va='center')
ax.annotate('1', xy=(x1, y1), fontsize=12, color='black')
ax.annotate('-1', xy=(-1, 0.01), fontsize=12, color='black')
plt.show()

In [None]:
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure


# creating a function to generate the pie chart with lines and additional points
def create_pie_chart(title, angle, x1, y1, value_1, value_minus_1):
    fig = Figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    
    # defining the labels and values for the pie chart
    labels = ['', 'Negative', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Positive', '']
    values = [0.5, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125,
              0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125]
    
    # defining the colors for each segment
    colors = ['none', '#FF0000', '#FF2200', '#FF4400', '#FF6600', '#FF8800', '#FFAA00', '#FFCC00', '#FFFF33',
              '#FFFF66', '#FFFF99', '#FFFFCC', '#CCFFCC', '#99FF99', '#66FF66', '#33FF33', '#00FF00']
    
    # creating a pie chart
    ax.pie(values, labels=labels, colors=colors, startangle=0, counterclock=False, radius=1.5, wedgeprops={'width': 0.4})
    ax.axis('equal')
    ax.set_title(title)
    
    # adding lines to the chart
    ax.annotate('1', xy=(x1, y1), fontsize=12, color='black')
    ax.annotate('-1', xy=(-1, 0.01), fontsize=12, color='black')
    
    angle_rad = np.radians(angle)
    x = [0, 0.8 * np.cos(angle_rad)]
    y = [0, 0.8 * np.sin(angle_rad)]
    ax.plot(x, y, color='black', linestyle='--', linewidth=1)
    
    # adding the additional points with custom coordinates
    x2 = x[1]  
    y2 = y[1] 
    offset = 0.05  # Adjust the offset to raise the blue values
    ax.annotate(value_1, xy=(x2, y2 + offset), fontsize=12, color='blue', ha='center', va='center')
    
    x3 = x[1]  
    y3 = y[1]  
    offset = -0.05
  #  ax.annotate(value_minus_1, xy=(x3, y3 + offset), fontsize=12, color='red', ha='center', va='center')
    
    return fig

# creating the main application window
root = tk.Tk()
root.title('Interactive Pie Charts')

# creating three separate pie charts with different titles, angles, and line coordinates
chart1 = create_pie_chart('Total Average', 90, 1, 0.01, '0.14', '-1')
chart2 = create_pie_chart('Before 2009-06-17', 85, 1, 0.01, '0.17', '-1')
chart3 = create_pie_chart('After 2009-06-17', 95, 1, 0.01, '-0.05', '-1')

# creating canvas widgets for the charts
canvas1 = FigureCanvasTkAgg(chart1, master=root)
canvas2 = FigureCanvasTkAgg(chart2, master=root)
canvas3 = FigureCanvasTkAgg(chart3, master=root)

# packing the canvas widgets to the window
canvas1.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
canvas2.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
canvas3.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

# startig the Tkinter
root.mainloop()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# function to create a pie chart
def create_pie_chart(ax, title, angle, value_1, value_minus_1):
    
    # defining the data for the pie chart (labels, values, and colors)
    labels = ['', 'Negative', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Positive', '']
    values = [0.5, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125,
              0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125, 0.03125]
    colors = ['none', '#FF0000', '#FF2200', '#FF4400', '#FF6600', '#FF8800', '#FFAA00', '#FFCC00', '#FFFF33',
              '#FFFF66', '#FFFF99', '#FFFFCC', '#CCFFCC', '#99FF99', '#66FF66', '#33FF33', '#00FF00']

    # C+creating the pie chart
    ax.pie(values, labels=labels, colors=colors, startangle=0, counterclock=False, radius=1.5, wedgeprops={'width': 0.4})
    ax.axis('equal')
    ax.set_title(title)

    # adding lines to the chart
    x1, y1 = 1, 0.01
    angle_rad = np.radians(angle)
    x = [0, 0.8 * np.cos(angle_rad)]
    y = [0, 0.8 * np.sin(angle_rad)]
    ax.plot(x, y, color='black', linestyle='--', linewidth=1)

    # adding the additional points with custom coordinates
    offset = 0.05 
    ax.annotate(value_1, xy=(x[1], y[1] + offset), fontsize=12, color='blue', ha='center', va='center')
    ax.annotate('1', xy=(x1, y1), fontsize=12, color='black')
    ax.annotate('-1', xy=(-1, 0.01), fontsize=12, color='black')

# creating a figure with subplots for the pie charts
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# creating pie charts and add them to the subplots
create_pie_chart(axes[0], 'Total Average', 90, '0.14', '-1')
create_pie_chart(axes[1], 'Before 2009-06-17', 85, '0.17', '-1')
create_pie_chart(axes[2], 'After 2009-06-17', 95, '-0.05', '-1')


fig.savefig(r'C:\\ESTERA\\CCT\\TWEETS\\pie_charts.png', bbox_inches='tight')
plt.show()

# Checking tweets content for context of the data

In [None]:
import pandas as pd
csv_file_path = "C:\\ESTERA\\CCT\\TWEETS\\ProjectTweets.csv"
column_names =['Id','Tweet_id','Date','Flag','User','Tweet']

data_2 = pd.read_csv(csv_file_path, names=column_names)
data_2.columns = column_names

In [None]:
data_2.head()

In [None]:
# checking for duplicates
duplicates = data_2[data_2.duplicated()]

if duplicates.empty:
    print("No duplicates found in the DataFrame.")
else:
    print("Duplicates found in the DataFrame:")
    print(duplicates)

In [None]:
data_2.info()

In [None]:
# removing the 'PDT' part from the 'Date' column
data_2['Date'] = data_2['Date'].str.replace(' PDT', '')

# converting the 'Date' column to a datetime format
data_2['Date'] = pd.to_datetime(data_2['Date'], format='%a %b %d %H:%M:%S %Y')

# extracting the 'Time' component while preserving the original date format
data_2['Time'] = data_2['Date'].dt.strftime('%H:%M:%S')

# extracting the 'Date' component in the 'YYYY-MM-DD' format
data_2['Date'] = data_2['Date'].dt.strftime('%Y-%m-%d')


In [None]:
data_2.head()

### preprocessing data before pulling context

In [None]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalpha()]
    tokens = [word for word in tokens if word not in stop_words]
    return " ".join(tokens)

In [None]:
# appling the preprocessing function to the 'Tweet' column
data_2['Tweet'] = data_2['Tweet'].apply(preprocess_text)
data_2.head()

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# creating a Document-Term Matrix (DTM)
vectorizer = CountVectorizer(max_features=1000, stop_words='english')
dtm = vectorizer.fit_transform(data_2['Tweet'])

# appling LDA
num_topics = 5 
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
lda.fit(dtm)

#### Inspecting topics

In [None]:
# getting the top 10 words for each topic
num_top_words = 10  
feature_names = vectorizer.get_feature_names_out()
for topic_idx, topic in enumerate(lda.components_):
    top_words_idx = topic.argsort()[:-num_top_words - 1:-1]
    top_words = [feature_names[i] for i in top_words_idx]
    print(f"Topic {topic_idx + 1}: {', '.join(top_words)}")

###
Topic 1: Positive Emotions <br>
Topic 2: Daily Life and Activities<br>
Topic 3: Online Interaction and Communication <br>
Topic 4: Daily Routine and Well-being <br>
Topic 5: Expressing Thoughts and Opinions

In [None]:
# assigning topics to documents
topic_distribution = lda.transform(dtm)

# adding 1 to topic labels to make them start from 1
data_2['Topic'] = (topic_distribution.argmax(axis=1) + 1)

In [None]:
data_2.head(10)

In [None]:
# groupping by 'Date' and 'Topic', and count the number of occurrences
topic_counts = data_2.groupby(['Date', 'Topic']).size().unstack(fill_value=0)

# creating a stacked area plot
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
topic_counts.plot(kind='area', stacked=True, colormap='viridis')
plt.title("Topic Distribution Over Time")
plt.xlabel("Date")
plt.ylabel("Number of Tweets")
plt.legend(title='Topic', loc='upper right', labels=['', 'Topic 1', '', 'Topic 2', '', 'Topic 3','','Topic 4','','Topic 5'])
plt.show()

# Sentiment Forecasting

### 1. Analizing missing data


In [None]:
print(daily_average_sentiment)

In [None]:
# converting the 'Date' column to datetime
daily_average_sentiment['Date'] = pd.to_datetime(daily_average_sentiment['Date'])

# finding the minimum and maximum dates in DataFrame
min_date = daily_average_sentiment['Date'].min()
max_date = daily_average_sentiment['Date'].max()

# creating a date range covering the entire range
date_range = pd.date_range(start=min_date, end=max_date)

# creating a new DataFrame with the date range
date_range_df = pd.DataFrame({'Date': date_range})

# merging the date range DataFrame with 'daily_average_sentiment' DataFrame, filling missing dates
merged_data = date_range_df.merge(daily_average_sentiment, on='Date', how='left')

# setting 'AverageSentimentScore' to NaN for missing dates
merged_data['AverageSentimentScore'].fillna(float('nan'), inplace=True)

# sortting the merged DataFrame by date
merged_data.sort_values('Date', inplace=True)

# reseting the index
merged_data.reset_index(drop=True, inplace=True)
merged_data.head()

In [None]:
merged_data.head(40)

In [None]:
missing_data = pd.DataFrame(merged_data)

# setting 'Date' as the index
missing_data.set_index('Date', inplace=True)

# date to check before and after
date_to_check = '2009-05-09'

# counting NaN values before and after the date
nan_before = missing_data.loc[:date_to_check, 'AverageSentimentScore'].isna().sum()
nan_after = missing_data.loc[date_to_check:, 'AverageSentimentScore'].isna().sum()

print("Number of NaN values before", date_to_check, ":", nan_before)
print("Number of NaN values after", date_to_check, ":", nan_after)

### 2. Handling missing data
Data will be temporary tuncated as majority of missing data points are located at very begining of data set.

#### a) filling missing values

In [None]:
# selecting only rows after '2009-05-09'
rows = missing_data.loc['2009-05-09':]
print(rows)

In [None]:
# filling missing values using linear interpolation
rows['AverageSentimentScore'].interpolate(method='linear', inplace=True)

In [None]:
print(rows)

#### b) checking trend, seasonality and randomness

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse
%matplotlib inline

# Additive Decomposition
result_add = seasonal_decompose(rows['AverageSentimentScore'], model='additive', extrapolate_trend='freq')

# Plot
plt.rcParams.update({'figure.figsize': (10,10)})
result_add.plot().suptitle('Additive Decompose', fontsize=22)
plt.show()

In [None]:
# Extract the Components 
df_reconstructed = pd.concat([result_add.seasonal, result_add.trend, result_add.resid, result_add.observed], axis=1)
df_reconstructed.columns = ['seas', 'trend', 'resid', 'actual_values']
df_reconstructed.head()

In [None]:
print(df_reconstructed)

In [None]:
# creating a figure and plot
plt.figure(figsize=(12, 6))
plt.plot(df_reconstructed.index, df_reconstructed['seas'], label='Seasonal', marker='o')
plt.plot(df_reconstructed.index, df_reconstructed['trend'], label='Trend', marker='o')
plt.plot(df_reconstructed.index, df_reconstructed['resid'], label='Residual', marker='o')
plt.plot(df_reconstructed.index, df_reconstructed['actual_values'], label='Actual Values', marker='o', linestyle='--')

# customizing the plot
plt.title('Time Series Decomposition')
plt.xlabel('Date')
plt.ylabel('Values')
plt.legend()
plt.grid(True)

# showing the plot
plt.show()

In [None]:
# creating an interactive multiline chart
fig = px.line(df_reconstructed, labels={'value': 'Values'}, title='Time Series Decomposition')
fig.show()

#### c) checking if data is stationary or not

In [None]:
from statsmodels.tsa.stattools import adfuller

# extractting the time series from DataFrame
time_series = rows['AverageSentimentScore']

# performing the ADF test
result = adfuller(time_series)

# extracting and print the results
adf_statistic, p_value, used_lag, nobs, critical_values, icbest = result

print("ADF Statistic:", adf_statistic)
print("p-value:", p_value)
print("Used Lag:", used_lag)
print("Number of Observations:", nobs)
print("Critical Values:", critical_values)
print("IC Best:", icbest)

# interpreting the results
if p_value <= 0.05:
    print("Reject the null hypothesis: The data is stationary.")
else:
    print("Fail to reject the null hypothesis: The data is not stationary.")

#### d) Reverse filling missing data & applying known informations to entire data set

In [None]:
merged_data.head(20)

In [None]:
# creating a copy of the 'merged_data' DataFrame
reversed_df = merged_data.copy()

# reversing the order of the DataFrame
reversed_df = reversed_df.iloc[::-1].reset_index(drop=True)
reversed_df.head()

In [None]:
reversed_df.tail(20)

In [None]:
end_date = pd.to_datetime("2009-06-08")

# slicing the df_reconstructed DataFrame to include only rows until the specified end_date
composition = df_reconstructed.loc[:end_date].copy()


# reversing the order of the DataFrame
composition = composition.iloc[::-1].reset_index(drop=False)
composition.head()

In [None]:
print(composition)

In [None]:
# finding the index where missing values start in reversed_df
missing_start_idx = reversed_df[reversed_df['AverageSentimentScore'].isna()].index[0]

# linear interpolation for available values in reversed_df
reversed_df['AverageSentimentScore'] = reversed_df['AverageSentimentScore'].interpolate(method='linear')

# iterating over the remaining missing values and backcast them based on composition
for idx in range(missing_start_idx, len(reversed_df)):
    
    # calculating the corresponding date for the reversed_df index
    date = reversed_df.index[idx]
    
    # finding the corresponding row in the composition DataFrame
    composition_row = composition[composition['Date'] == date]
    
    # checking if a corresponding row was found
    if not composition_row.empty:
        
        # using the trend and resid values to estimate the missing value
        estimated_value = composition_row['trend'].values[0] + composition_row['resid'].values[0]
        
        # filling in the missing value in reversed_df
        reversed_df.at[date, 'AverageSentimentScore'] = estimated_value

# filling the remaining missing values with NaN (if any)
reversed_df['AverageSentimentScore'].fillna(np.nan, inplace=True)

In [None]:
reversed_df.tail(20)

In [None]:
full_df = reversed_df.iloc[::-1]
full_df.head()

In [None]:
full_df.reset_index(drop=True, inplace=True)
full_df.head()

In [None]:
# converting the 'Date' column to a datetime data type
full_df['Date'] = pd.to_datetime(full_df['Date'])

# setting the 'Date' column as the index
full_df.set_index('Date', inplace=True)

# verifing the changes
print(full_df.head())

### Stationarity check

In [None]:
# linear interpolation for available values in reversed_df
merged_data['AverageSentimentScore'] = merged_data['AverageSentimentScore'].interpolate(method='linear')

In [None]:
merged_data.head()

In [None]:
from statsmodels.tsa.stattools import adfuller
stationarity = adfuller(merged_data['AverageSentimentScore'])

#stationarity
print('Dickey Fuller p-value: %F' % stationarity[1])

In [None]:
# calculating the second differenced series
merged_data['Differenced_Sentiment'] = merged_data['AverageSentimentScore'].diff()

# backfilling NaN values with zero
merged_data['Differenced_Sentiment'].fillna(0, inplace=True)
print(merged_data) 

In [None]:
stationarity = adfuller(merged_data['Differenced_Sentiment'])

print('Dickey Fuller p-value: %F' % stationarity[1])

# 7 day forecast

In [None]:
import statsmodels.api as sm

merged_data['Date'] = pd.to_datetime(merged_data['Date'])
merged_data.set_index('Date', inplace=True)

In [None]:
# splitting the data into a training and test set
train_size = int(0.8 * len(merged_data))
train_data = merged_data.iloc[:train_size]
test_data = merged_data.iloc[train_size:]

# fitting the ETS model to the training data
ets_model = sm.tsa.ExponentialSmoothing(train_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=20)
ets_result = ets_model.fit()

# generating forecasts for the test set
forecast_periods = len(test_data)
forecast_values = ets_result.forecast(steps=forecast_periods)


# calculating forecast errors
forecast_errors = test_data['Differenced_Sentiment'] - forecast_values

# calculate evaluation metrics
mae_ets = forecast_errors.abs().mean()
mse_ets = (forecast_errors ** 2).mean()
rmse_ets = np.sqrt(mse_ets)
mape_ets = (forecast_errors / test_data['Differenced_Sentiment']).abs().mean() * 100

# displaing the results
print("Forecast Evaluation Results:")
print("ETS_Mean Absolute Error (MAE):", mae_ets)
print("ETS_Mean Squared Error (MSE):", mse_ets)
print("ETS_Root Mean Squared Error (RMSE):", rmse_ets)
print("ETS_Mean Absolute Percentage Error (MAPE):", mape_ets)


# creating a DataFrame to store error results for ets model
error_ets = pd.DataFrame({
    'Model': ['ExponentialSmoothing'],
    'MAE': [mae_ets],
    'MSE': [mse_ets],
    'RMSE': [rmse_ets],
    'MAPE': [mape_ets]
})

In [None]:
# creating a chart to visualize the results
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data['Differenced_Sentiment'], label='Training Data', marker='o')
plt.plot(test_data.index, test_data['Differenced_Sentiment'], label='Test Data', marker='o')
plt.plot(test_data.index, forecast_values, label='Forecast', linestyle='--', marker='o')

plt.legend()
plt.title('Time Series Forecast with ETS')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True)
plt.show()

In [None]:
# fitting the ETS model to data
ets_model = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=7)
ets_result = ets_model.fit()

# generating forecasts for the next 7 days
forecast_periods = 7
forecast_values = ets_result.forecast(steps=forecast_periods)

In [None]:
# plotting the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(merged_data['Differenced_Sentiment'], label='Original Data', marker='o')
plt.plot(forecast_values, label='Forecast', linestyle='--', marker='o')
plt.legend()
plt.title('7 Days Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

### 30 Days forecast

In [None]:
# fitting the ETS model to data
ets_model = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=10)
ets_result = ets_model.fit()

# generating forecasts for the next 30 days
forecast_periods = 30
forecast_values = ets_result.forecast(steps=forecast_periods)

In [None]:
# plottinh the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(merged_data['Differenced_Sentiment'], label='Original Data', marker='o')
plt.plot(forecast_values, label='Forecast', linestyle='--', marker='o')
plt.legend()
plt.title('30 Days Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

## 90 days forecast

In [None]:
# fitting the ETS model to data
ets_model = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=30)
ets_result = ets_model.fit()

# generating forecasts for the next 90 days
forecast_periods = 90
forecast_values = ets_result.forecast(steps=forecast_periods)

In [None]:
# Plot the original data and the forecast
plt.figure(figsize=(12, 6))
plt.plot(merged_data['Differenced_Sentiment'], label='Original Data', marker='o')
plt.plot(forecast_values, label='Forecast', linestyle='--', marker='o')
plt.legend()
plt.title('90 Days Forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

# ARIMA

### Converting data into NonStationary

In [None]:
from statsmodels.tsa.stattools import adfuller
stationarity = adfuller(full_df['AverageSentimentScore'])

#stationarity
print('Dickey Fuller p-value: %F' % stationarity[1])

In [None]:
time_series = full_df['AverageSentimentScore']

# performming the ADF test
result = adfuller(time_series)

# extractting and print the results
adf_statistic, p_value, used_lag, nobs, critical_values, icbest = result

print("ADF Statistic:", adf_statistic)
print("p-value:", p_value)
print("Used Lag:", used_lag)
print("Number of Observations:", nobs)
print("Critical Values:", critical_values)
print("IC Best:", icbest)

# interpreting the results
if p_value <= 0.05:
    print("Reject the null hypothesis: The data is stationary.")
else:
    print("Fail to reject the null hypothesis: The data is not stationary.")

In [None]:
# calculating differenced series
full_df['Differenced_Sentiment'] = full_df['AverageSentimentScore'].diff()

# backfill NaN values with zero or another suitable value
full_df['Differenced_Sentiment'].fillna(0, inplace=True)
print(full_df) 

In [None]:
stationarity = adfuller(full_df['Differenced_Sentiment'])

#stationarity
print('Dickey Fuller p-value: %F' % stationarity[1])

### Predicting next 7 days

In [None]:
# splitting the data
train = full_df['AverageSentimentScore'].iloc[:-7] 
test = full_df['AverageSentimentScore'].iloc[-7:] 

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# calculating ACF and PACF
acf_plot = plot_acf(full_df['Differenced_Sentiment'], lags=30)
plt.title('Autocorrelation Function (ACF)')

pacf_plot = plot_pacf(full_df['Differenced_Sentiment'], lags=30)
plt.title('Partial Autocorrelation Function (PACF')

plt.show()

In [None]:
import itertools
import statsmodels.api as sm

# defining the range of values for p, d, and q
p_values = range(0, 3) 
d_values = range(0, 2)  
q_values = range(0, 3)  

best_aic = float("inf")
best_params = (0, 0, 0)

# grid search to find the best parameters
for p, d, q in itertools.product(p_values, d_values, q_values):
    try:
        model = sm.tsa.ARIMA(full_df['AverageSentimentScore'], order=(p, d, q))
        results = model.fit()

        aic = results.aic
        if aic < best_aic:
            best_aic = aic
            best_params = (p, d, q)

    except:
        continue

print("Best AIC:", best_aic)
print("Best Parameters (p, d, q):", best_params)

In [None]:
import statsmodels.api as sm

# defining and fitting the ARIMA model
model = sm.tsa.ARIMA(train, order=(0,1,1)) 
results = model.fit()

In [None]:
# generating forecasts for the 7 days
forecast_values = results.forecast(steps=7)

# printing forecasted values
print("Forecasted Values:", forecast_values)

# Rolling method

In [None]:
# converting the 'Date' column to datetime
daily_average_sentiment['Date'] = pd.to_datetime(daily_average_sentiment['Date'])

# finding the minimum and maximum dates in DataFrame
min_date = daily_average_sentiment['Date'].min()
max_date = daily_average_sentiment['Date'].max()

# creating a date range covering the entire range
date_range = pd.date_range(start=min_date, end=max_date)

# creating a new DataFrame with the date range
date_range_df = pd.DataFrame({'Date': date_range})

# merging the date range DataFrame with'daily_average_sentiment' DataFrame, filling missing dates
rolling = date_range_df.merge(daily_average_sentiment, on='Date', how='left')

# setting 'AverageSentimentScore' to NaN for missing dates
rolling['AverageSentimentScore'].fillna(float('nan'), inplace=True)

# sortting the merged DataFrame by date
rolling.sort_values('Date', inplace=True)

# resetting the index
rolling.reset_index(drop=True, inplace=True)
rolling.head()

In [None]:
# linear interpolation for available values in reversed_df
rolling['AverageSentimentScore'] = rolling['AverageSentimentScore'].interpolate(method='linear')

In [None]:
# calculating the differenced series
rolling['Differenced_Sentiment'] = rolling['AverageSentimentScore'].diff()

# backfilling NaN values with zero or another suitable value
rolling['Differenced_Sentiment'].fillna(0, inplace=True)
print(rolling) 

In [None]:
rolling['Date'] = pd.to_datetime(rolling['Date'], format='%Y/%m/%d')
rolling = rolling.set_index('Date')
rolling = rolling.rename(columns={'Differenced_Sentiment': 'y'})

rolling = rolling.sort_index()
rolling.head()

In [None]:
rolling.info()

In [None]:
(rolling.index == pd.date_range(start=rolling.index.min(),
                             end=rolling.index.max(),
                             freq=rolling.index.freq)).all()

In [None]:
# splitting data into train-test
steps = 7
rolling_train = rolling[:-steps]
rolling_test  = rolling[-steps:]

print(f"Train dates : {rolling_train.index.min()} --- {rolling_train.index.max()}  (n={len(rolling_train)})")
print(f"Test dates  : {rolling_test.index.min()} --- {rolling_test.index.max()}  (n={len(rolling_test)})")

fig, ax=plt.subplots(figsize=(9, 4))
rolling_train['y'].plot(ax=ax, label='train')
rolling_test['y'].plot(ax=ax, label='test')
ax.legend();

In [None]:
# creating and train forecaster
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.ensemble import RandomForestRegressor

forecaster = ForecasterAutoreg(
                regressor = RandomForestRegressor(random_state=123),
                lags = 7
                )

forecaster.fit(y=rolling_train['y'])
forecaster

In [None]:
# predictions
steps = 7
predictions = forecaster.predict(steps=steps)
predictions.index = rolling_test.index
predictions.head(7)

In [None]:
# plotting
fig, ax = plt.subplots(figsize=(9, 4))
rolling_train['y'].plot(ax=ax, label='train')
rolling_test['y'].plot(ax=ax, label='test')
predictions.plot(ax=ax, label='predictions')
ax.legend();


# calculating forecast errors
errors = rolling_test['y'] - predictions

# calculating evaluation metrics
mae_R = mean_absolute_error(rolling_test['y'], predictions)
mse_R = mean_squared_error(rolling_test['y'], predictions)
rmse_R = np.sqrt(mse)
mape_R = (np.abs(errors) / rolling_test['y']).mean() * 100

# printing the error metrics
print("Roll_Mean Absolute Error (MAE):", mae_R)
print("Roll_Mean Squared Error (MSE):", mse_R)
print("Roll_Root Mean Squared Error (RMSE):", rmse_R)
print("Roll_Mean Absolute Percentage Error (MAPE):", mape_R)

In [None]:
# creating a DataFrame to store error results for Random forest model
error_R = pd.DataFrame({
    'Model': ['RandomForestRegressor'],
    'MAE': [mae_R],
    'MSE': [mse_R],
    'RMSE': [rmse_R],
    'MAPE': [mape_R]
})

In [None]:
error_df = pd.concat([error_ets, error_R], ignore_index=True)
error_df

In [None]:
forecast_data = rolling.copy()

# setting the frequency of the DatetimeIndex
forecast_data.index.freq = 'D'

# setting the number of periods to forecast
forecast_periods = 7

# creating a RandomForestRegressor-based forecaster
forecaster = ForecasterAutoreg(
    regressor=RandomForestRegressor(random_state=123),
    lags=7
)

# fitting the forecaster to data
forecaster.fit(y=forecast_data['y'])

# generating forecasts for the next 7 days
forecasts = forecaster.predict(steps=forecast_periods)

# getting the last date in the existing data
last_date = forecast_data.index[-1]

# creating a date range for the forecasted dates
forecast_dates = pd.date_range(start=last_date, periods=forecast_periods + 1, closed='right')

# creating a DataFrame for the forecasts
forecast_df = pd.DataFrame({'Forecast': forecasts}, index=forecast_dates[1:])

# printing the forecast
print(forecast_df)


# plotting the forecast
plt.figure(figsize=(10, 5))
plt.plot(forecast_data.index, forecast_data['y'], label='Actual Data', marker='o')
plt.plot(forecast_df.index, forecast_df['Forecast'], label='Forecast', linestyle='--', marker='o')
plt.title('Forecast for the Next 7 Days')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()

## Dashboard

In [None]:
merged_data

In [None]:
import ipywidgets as widgets
from IPython.display import display
import ipywidgets as widgets
import plotly.express as px
import statsmodels.api as sm
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_table
import dash_bootstrap_components as dbc
import plotly.offline as pyo
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html

In [None]:
# Create the horizontal bar chart
fig_bar = px.bar(df4, x='SentimentScore', y='DayOfWeek', orientation='h', title='Average Sentiment Score per Day of the Week')
fig_bar.update_layout(xaxis_title='Average Sentiment Score', yaxis_title='Day of the Week', showlegend=False)



# Creating an interactive box plot
fig_box = px.box(df, x=df['Date'].dt.date, y='SentimentScore', labels={'x': 'Date', 'y': 'Sentiment Score'},
                 title='Distribution of Sentiment Scores for Each Day')
fig_box.update_xaxes(tickangle=90)




# Existing code for the first ETS forecasting chart
ets_model1 = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=7)
ets_result1 = ets_model1.fit()
forecast_periods1 = 7
forecast_values1 = ets_result1.forecast(steps=forecast_periods1)
fig_ets1 = go.Figure()
fig_ets1.add_trace(go.Scatter(x=merged_data['Date'], y=merged_data['Differenced_Sentiment'], mode='lines+markers', name='Original Data'))
forecast_dates1 = [merged_data['Date'].iloc[-1]] + [(pd.to_datetime(merged_data['Date'].iloc[-1]) + pd.DateOffset(days=i)).date() for i in range(1, forecast_periods1 + 1)]
fig_ets1.add_trace(go.Scatter(x=forecast_dates1,
                            y=[merged_data['Differenced_Sentiment'].iloc[-1]] + forecast_values1.tolist(),
                            mode='lines+markers', name='7 Days Forecast', line=dict(dash='dash')))
fig_ets1.update_layout(title='7 Days Forecast', xaxis_title='Date', yaxis_title='Value')



# New code for the second ETS forecasting chart
ets_model2 = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=10)
ets_result2 = ets_model2.fit()
forecast_periods2 = 30
forecast_values2 = ets_result2.forecast(steps=forecast_periods2)
fig_ets2 = go.Figure()
fig_ets2.add_trace(go.Scatter(x=merged_data['Date'], y=merged_data['Differenced_Sentiment'], mode='lines+markers', name='Original Data'))
forecast_dates2 = [merged_data['Date'].iloc[-1]] + [(pd.to_datetime(merged_data['Date'].iloc[-1]) + pd.DateOffset(days=i)).date() for i in range(1, forecast_periods2 + 1)]
fig_ets2.add_trace(go.Scatter(x=forecast_dates2,
                            y=[merged_data['Differenced_Sentiment'].iloc[-1]] + forecast_values2.tolist(),
                            mode='lines+markers', name='30 Days Forecast', line=dict(dash='dash')))
fig_ets2.update_layout(title='30 Days Forecast', xaxis_title='Date', yaxis_title='Value')



# New code for the third ETS forecasting chart
ets_model3 = sm.tsa.ExponentialSmoothing(merged_data['Differenced_Sentiment'], trend='add', seasonal='add', seasonal_periods=30)
ets_result3 = ets_model3.fit()
forecast_periods3 = 90
forecast_values3 = ets_result3.forecast(steps=forecast_periods3)
fig_ets3 = go.Figure()
fig_ets3.add_trace(go.Scatter(x=merged_data['Date'], y=merged_data['Differenced_Sentiment'], mode='lines+markers', name='Original Data'))
forecast_dates3 = [merged_data['Date'].iloc[-1]] + [(pd.to_datetime(merged_data['Date'].iloc[-1]) + pd.DateOffset(days=i)).date() for i in range(1, forecast_periods3 + 1)]
fig_ets3.add_trace(go.Scatter(x=forecast_dates3,
                            y=[merged_data['Differenced_Sentiment'].iloc[-1]] + forecast_values3.tolist(),
                            mode='lines+markers', name='90 Days Forecast', line=dict(dash='dash')))
fig_ets3.update_layout(title='90 Days Forecast', xaxis_title='Date', yaxis_title='Value')


data = {
    'DayOfWeek': ['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday', 'Wednesday'],
    'SentimentScore': [0.140448, 0.160366, 0.152299, 0.177131, 0.039275, 0.115775, 0.055170]
}
df_table = pd.DataFrame(data)


# Create the table
table = dash_table.DataTable(
    id='sentiment-table',
    columns=[{'name': col, 'id': col} for col in df_table.columns],
    data=df_table.to_dict('records'),
    style_table={
        'width': '50%',
        'margin': 'auto'
    }
)

# Create the horizontal bar chart
fig_bar = px.bar(df_table, x='SentimentScore', y='DayOfWeek', orientation='h',
                 title='Average Sentiment Score per Day of the Week')
fig_bar.update_layout(
    xaxis_title='Average Sentiment Score',
    yaxis_title='Day of the Week',
    showlegend=False,
    plot_bgcolor='lightgray',  # Background color
    paper_bgcolor='black'  # Plot area background color
)

# Creating the dashboard app
app = dash.Dash(__name__)

# Define a consistent style for graphs
graph_style = {
    'border': '1px solid lightgray',
    'margin': '10px',
    'box-shadow': '2px 2px 5px #888888'
}

# Modify the layout to include the customized charts
app.layout = html.Div([
    html.H1('Twitter Sentiment Analysis and Forecast', style={'textAlign': 'center'}),
    dcc.Graph(figure=fig_box, style=graph_style),
    dcc.Graph(figure=fig_bar, style=graph_style),
    dcc.Graph(figure=fig_ets1, style=graph_style),
    dcc.Graph(figure=fig_ets2, style=graph_style),
    dcc.Graph(figure=fig_ets3, style=graph_style),
    table,
], style={'padding': '20px'})


@app.callback(Output('dummy-div', 'children'), Input('save-button', 'n_clicks'))
def save_dashboard(n_clicks):
    if n_clicks:
        with open('C:\\ESTERA\\CCT\\TWEETS\\dash_app.html', 'w') as f:
            f.write(app.index())
    return None

# Create a hidden button and a dummy div
html.Button(id='save-button', n_clicks=0, style={'display': 'none'}),
html.Div(id='dummy-div'),

if __name__ == '__main__':
    app.run_server(debug=False)