In [2]:
# Import Dependencies
import pandas as pd
import numpy as np
import re
import spacy
import sqlite3
from spacy.attrs import ORTH, LEMMA, NORM, TAG
from datetime import datetime, timedelta
from spacy.attrs import ORTH, LEMMA, NORM, TAG
from clean import replace_emoticons, clean_text, clean_tweet
from joblib import load

In [3]:
# Load machine learning prediction model
model = load("twitter_model.joblib")

In [9]:
# Connect to database and create twitter table
conn = sqlite3.connect("politics_db.db")
conn.execute("DROP TABLE IF EXISTS twitter;")
conn.execute("""CREATE TABLE "twitter" (
            "id" TEXT UNIQUE,
            "twitter_id" INTEGER,
            "tweet" TEXT, 
            "tweet_date" TEXT,
            "retweet" INTEGER,
            "favorite" INTEGER,
            "username" TEXT,
            "sentiment" INTEGER     
            )""")

<sqlite3.Cursor at 0x1a2a357ce0>

In [10]:
# GRETEL - GET THESE NAMES FROM THE DATABASE

usernames = ["@JoeBiden", "@BernieSanders", "@ewarren", "@KamalaHarris"]

In [11]:
for username in usernames:

    # Import candidate's tweets into a dataframe
    twitter_df = pd.read_csv(f"data/{username}.csv", error_bad_lines=False)

    # Convert date from UTC to EDT
    twitter_df["tweet_date"] = twitter_df["tweet_date"].\
        map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - timedelta(hours=4))

    # Delete tweets from more than a week from first tweet
    max_date = twitter_df.tweet_date.min() + timedelta(days = 7)
    twitter_df = twitter_df[twitter_df["tweet_date"] < max_date]
    
    # Clean text of tweets using previously defined clean_tweet function
    twitter_df["tweet"] = twitter_df["tweet"].map(lambda x: clean_tweet(x, nlp))
    
    # Apply model to tweet column to get sentiment prediction
    twitter_df["sentiment"] = model.predict(twitter_df["tweet"].values.astype("U"))
    
    # Create username column with candidate's username
    twitter_df["username"] = username
    
    # Create key_id column to work as unique primary key, as a single tweet can apply to multiple candidates
    twitter_df["id"] = twitter_df["twitter_id"].map(lambda x: username + str(x))
    
    # Set index to be id column
    twitter_df = twitter_df.set_index("id")
    
    # Add candidate's tweets to twitter table
    twitter_df.to_sql("twitter", conn, if_exists="append") 