# Importing libraries

In [3]:
import pandas as pd
import os
import csv
import datetime
import time
from pytz import timezone
import tweepy
import json
import statsmodels.api as sm
import plotly.graph_objects as go

# Loading the credentials

In [2]:
# Load the Twitter API credentials from the config file
with open('config.json', 'r') as f:
    config = json.load(f)
    consumer_key = config['consumer_key']
    consumer_secret = config['consumer_secret']
    access_token = config['access_token']
    access_token_secret = config['access_token_secret']
    
# Verify the Twitter API credentials
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
try:
    api = tweepy.API(auth, wait_on_rate_limit=True)
    user = api.verify_credentials()
    print("Twitter API connection successful.")
except tweepy.error.TweepError as e:
    print("Error: Failed to verify Twitter API credentials.")
    print(e)

Twitter API connection successful.


# Disabling warnings

In [18]:
import warnings
warnings.filterwarnings("ignore")

# Downloading twitter data

In [4]:
# Define the topic and date range
topic = 'Ireland'
end_date = datetime.datetime(2023, 5, 21, tzinfo=timezone('Europe/London'))
start_date = datetime.datetime(2022, 5, 21, tzinfo=timezone('Europe/London'))

# Fetch tweets on the specified topic
try:
    tweets = []
    for tweet in tweepy.Cursor(api.search_tweets, q=topic, lang='en', tweet_mode='extended').items(2000):
        tweets.append({
            'Date': tweet.created_at.date(),
            'Tweet': tweet.full_text
        })
    print("Tweets downloaded successfully.")
except tweepy.TweepyException as e:
    if e.api_code == 88:
        # Rate limit reached, wait for the specified duration
        wait_time = int(e.response.headers['Retry-After'])
        print("Rate limit reached. Sleeping for:", wait_time, "seconds.")
        time.sleep(wait_time)
    print("Error: Failed to download tweets.")
    print(e)

Tweets downloaded successfully.


# Saving twitter data

In [5]:
# Convert the tweets list into a DataFrame
df_new = pd.DataFrame(tweets)

# Check if the CSV file already exists
if os.path.isfile('tweets.csv'):
    # Read the existing data from the CSV file
    df_existing = pd.read_csv('tweets.csv')
    
    # Concatenate the existing and new data
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    
    # Save the combined DataFrame to the CSV file
    df_combined.to_csv('tweets.csv', index=False)
    print("Tweets appended to the existing CSV file.")
else:
    # Save the new DataFrame to a new CSV file
    df_new.to_csv('tweets.csv', index=False)
    print("New CSV file created with the downloaded tweets.")

New CSV file created with the downloaded tweets.


# Sentiment analysis

In [6]:
from textblob import TextBlob
import pandas as pd

# Load the tweets from the CSV file
df = pd.read_csv('tweets.csv')

# Perform sentiment analysis using TextBlob
df['sentiment'] = df['Tweet'].apply(lambda x: TextBlob(x).sentiment.polarity)

# Classify sentiment as positive, negative, or neutral
df['sentiment_label'] = df['sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')

# Save the updated DataFrame to CSV
df.to_csv('tweets_sentiment.csv', index=False)

# Time series forecast of the sentiment

In [20]:
# Load the tweet sentiment data from the CSV file
df = pd.read_csv('tweets_sentiment.csv', parse_dates=['Date'])

# Set the 'Date' column as the index
df.set_index('Date', inplace=True)

try:
    # Fit an ARIMA model to the sentiment data
    model = sm.tsa.ARIMA(df['sentiment'], order=(1, 0, 1), trend='c').fit()

    # Generate predictions for the next 1 week, 1 month, and 3 months
    forecast_1w = model.predict(start=len(df), end=len(df) + 6, dynamic=False)
    forecast_1m = model.predict(start=len(df), end=len(df) + 30, dynamic=False)
    forecast_3m = model.predict(start=len(df), end=len(df) + 90, dynamic=False)

    # Create Plotly figure
    fig = go.Figure()

    # Add actual sentiment data
    fig.add_trace(go.Scatter(x=df.index, y=df['sentiment'], name='Actual'))

    # Add forecasted sentiment data
    forecast_dates_1w = pd.date_range(start=df.index[-1], periods=7)[1:]
    forecast_dates_1m = pd.date_range(start=df.index[-1], periods=31)[1:]
    forecast_dates_3m = pd.date_range(start=df.index[-1], periods=91)[1:]
    fig.add_trace(go.Scatter(x=forecast_dates_1w, y=forecast_1w, name='1 Week Forecast'))
    fig.add_trace(go.Scatter(x=forecast_dates_1m, y=forecast_1m, name='1 Month Forecast'))
    fig.add_trace(go.Scatter(x=forecast_dates_3m, y=forecast_3m, name='3 Months Forecast'))

    # Update layout
    fig.update_layout(
        title='Time Series Forecast of Sentiment',
        xaxis_title='Date',
        yaxis_title='Sentiment',
        legend_title='Forecast',
        hovermode='x unified'
    )

    # Show the interactive Plotly graph
    fig.show()

except ValueError as e:
    print("Error: Failed to make time series forecast.")
    print(e)

In [22]:
# Convert forecast data to strings
forecast_1w_str = forecast_1w.to_string(header=False)
forecast_1m_str = forecast_1m.to_string(header=False)
forecast_3m_str = forecast_3m.to_string(header=False)
# Print the forecast data
print("1 Week Forecast:")
print(forecast_1w_str)
print("1 Month Forecast:")
print(forecast_1m_str)
print("3 Months Forecast:")
print(forecast_3m_str)

1 Week Forecast:
2000    0.052659
2001    0.052391
2002    0.052393
2003    0.052393
2004    0.052393
2005    0.052393
2006    0.052393
1 Month Forecast:
2000    0.052659
2001    0.052391
2002    0.052393
2003    0.052393
2004    0.052393
2005    0.052393
2006    0.052393
2007    0.052393
2008    0.052393
2009    0.052393
2010    0.052393
2011    0.052393
2012    0.052393
2013    0.052393
2014    0.052393
2015    0.052393
2016    0.052393
2017    0.052393
2018    0.052393
2019    0.052393
2020    0.052393
2021    0.052393
2022    0.052393
2023    0.052393
2024    0.052393
2025    0.052393
2026    0.052393
2027    0.052393
2028    0.052393
2029    0.052393
2030    0.052393
3 Months Forecast:
2000    0.052659
2001    0.052391
2002    0.052393
2003    0.052393
2004    0.052393
2005    0.052393
2006    0.052393
2007    0.052393
2008    0.052393
2009    0.052393
2010    0.052393
2011    0.052393
2012    0.052393
2013    0.052393
2014    0.052393
2015    0.052393
2016    0.052393
2017    0.0