# Download libraries

In [1]:
!pip install pytz
!pip install tweepy
!pip install statsmodels
!pip install plotly

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


# Importing libraries

In [2]:
import pandas as pd
import os
import csv
import datetime
import time
import sys
from pytz import timezone
import tweepy
import json
import statsmodels.api as sm
import plotly.graph_objects as go
from datetime import datetime, timedelta
from pandas.errors import EmptyDataError
import logging

# Loading the credentials

In [3]:
# Load the Twitter API credentials from the config file
with open('config.json', 'r') as f:
    config = json.load(f)
    consumer_key = config['consumer_key']
    consumer_secret = config['consumer_secret']
    access_token = config['access_token']
    access_token_secret = config['access_token_secret']
    
# Verify the Twitter API credentials
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
try:
    api = tweepy.API(auth, wait_on_rate_limit=True)
    user = api.verify_credentials()
    print("Twitter API connection successful.")
except tweepy.error.TweepError as e:
    print("Error: Failed to verify Twitter API credentials.")
    print(e)

Twitter API connection successful.


# Disabling warnings

In [4]:
import warnings
warnings.filterwarnings("ignore")

# Downloading and save twitter data
The free tier of the twitter API holds the limitation of:</br>
<b>**7 Day tweet history limit </br>
**1500 tweet request limit </b>

In [None]:
# Get today's date
today = datetime.now().date()

# Create a log file name with today's date
log_file = f"TwitterAPI_{today}.log"

# Check if the log file exists
if os.path.isfile(log_file):
    # Append logs to the existing file
    sys.stdout = open(log_file, "a")
else:
    # Create a new log file
    sys.stdout = open(log_file, "w")

# Define the topic and initial date range
topic = "(ios OR apple OR AAPL OR iphone OR ipad)"
start_date = today - timedelta(days=91)

# Create a loop to run for 91 days
for _ in range(91):
    # Calculate the end date for the current iteration
    end_date = start_date + timedelta(days=1)
    
    # Format the dates as strings
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = end_date.strftime("%Y-%m-%d")
    
    # Define the search query with the current date range
    query = f"{topic} until:{end_date_str} since:{start_date_str}"
    
    # Fetch tweets on the specified topic
    try:
        tweets = []
        for tweet in tweepy.Cursor(api.search_tweets, q=query, lang='en', tweet_mode='extended').items(1500):
            tweets.append({
                'Date': tweet.created_at.date(),
                'Tweet': tweet.full_text
            })
        
        if len(tweets) > 0:
            print("Tweets downloaded successfully for the date range:", start_date_str, "to", end_date_str)
            
            # Convert the tweets list into a DataFrame
            df_new = pd.DataFrame(tweets)
            
            # Check if the CSV file already exists
            if os.path.isfile('tweets.csv'):
                # Read the existing data from the CSV file
                try:
                    df_existing = pd.read_csv('tweets.csv')
                    
                    # Check if the existing DataFrame has any columns
                    if df_existing.columns.empty:
                        # Handle the case when the CSV file is empty
                        df_existing = pd.DataFrame()
                        
                except pd.errors.EmptyDataError:
                    # Handle the case when the CSV file is empty
                    df_existing = pd.DataFrame()
                
                # Check if the existing DataFrame is empty
                if df_existing.empty:
                    # Save the new DataFrame to a new CSV file
                    df_new.to_csv('tweets.csv', index=False)
                    print("New CSV file created with the downloaded tweets.")
                else:
                    # Concatenate the existing and new data
                    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
                    
                    # Save the combined DataFrame to the CSV file
                    df_combined.to_csv('tweets.csv', index=False)
                    print("Tweets appended to the existing CSV file.")
            else:
                # Save the new DataFrame to a new CSV file
                df_new.to_csv('tweets.csv', index=False)
                print("New CSV file created with the downloaded tweets.")
        else:
            print("No tweets found for the date range:", start_date_str, "to", end_date_str)
            
    except tweepy.TweepyException as e:
        if e.api_code == 88:
            # Rate limit reached, wait for the specified duration
            wait_time = int(e.response.headers['Retry-After'])
            print("Rate limit reached. Sleeping for:", wait_time, "seconds.")
            time.sleep(wait_time)
        print("Error: Failed to download tweets.")
        print(e)
    
    # Update the start date for the next iteration
    start_date = end_date

# Close the log file
sys.stdout.close()

# Log data

In [None]:
# Read the log file into a DataFrame
log_df = pd.read_csv("log_file", sep=":", names=["Timestamp", "Log Message"])
# Display the log DataFrame
log_df

# Sentiment analysis

In [None]:
from textblob import TextBlob
import pandas as pd

# Load the tweets from the CSV file
df = pd.read_csv('tweets.csv')

# Perform sentiment analysis using TextBlob
df['sentiment'] = df['Tweet'].apply(lambda x: TextBlob(x).sentiment.polarity)

# Classify sentiment as positive, negative, or neutral
df['sentiment_label'] = df['sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')

# Save the updated DataFrame to CSV
df.to_csv('tweets_sentiment.csv', index=False)

# Time series forecast of the sentiment

In [None]:
# Get today's date
today = datetime.now().date()

# Define the topic and initial date range
topic = "(ios OR apple OR AAPL OR iphone OR ipad)"
start_date = today - timedelta(days=91)

# Create a loop to run for 91 days
for _ in range(91):
    # Calculate the end date for the current iteration
    end_date = start_date + timedelta(days=1)
    
    # Format the dates as strings
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = end_date.strftime("%Y-%m-%d")
    
    # Define the search query with the current date range
    query = f"{topic} until:{end_date_str} since:{start_date_str}"
    
    # Fetch tweets on the specified topic
    try:
        tweets = []
        for tweet in tweepy.Cursor(api.search_tweets, q=query, lang='en', tweet_mode='extended').items(1500):
            tweets.append({
                'Date': tweet.created_at.date(),
                'Tweet': tweet.full_text
            })
        
        if len(tweets) > 0:
            print("Tweets downloaded successfully for the date range:", start_date_str, "to", end_date_str)
            
            # Convert the tweets list into a DataFrame
            df_new = pd.DataFrame(tweets)
            
            # Check if the CSV file already exists
            if os.path.isfile('tweets.csv'):
                # Read the existing data from the CSV file
                try:
                    df_existing = pd.read_csv('tweets.csv')
                except EmptyDataError:
                    # Handle the case when the CSV file is empty
                    df_existing = pd.DataFrame()
                
                # Check if the existing DataFrame is empty
                if df_existing.empty:
                    # Save the new DataFrame to a new CSV file
                    df_new.to_csv('tweets.csv', index=False)
                    print("New CSV file created with the downloaded tweets.")
                else:
                    # Concatenate the existing and new data
                    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
                    
                    # Save the combined DataFrame to the CSV file
                    df_combined.to_csv('tweets.csv', index=False)
                    print("Tweets appended to the existing CSV file.")
            else:
                # Save the new DataFrame to a new CSV file
                df_new.to_csv('tweets.csv', index=False)
                print("New CSV file created with the downloaded tweets.")
        else:
            print("No tweets found for the date range:", start_date_str, "to", end_date_str)
            
    except tweepy.TweepyException as e:
        if e.api_code == 88:
            # Rate limit reached, wait for the specified duration
            wait_time = int(e.response.headers['Retry-After'])
            print("Rate limit reached. Sleeping for:", wait_time, "seconds.")
            time.sleep(wait_time)
        print("Error: Failed to download tweets.")
        print(e)
    
    # Update the start date for the next iteration
    start_date = end_date

In [None]:
# Convert forecast data to strings
forecast_1w_str = forecast_1w.to_string(header=False)
forecast_1m_str = forecast_1m.to_string(header=False)
forecast_3m_str = forecast_3m.to_string(header=False)
# Print the forecast data
print("1 Week Forecast:")
print(forecast_1w_str)
print("1 Month Forecast:")
print(forecast_1m_str)
print("3 Months Forecast:")
print(forecast_3m_str)