In [6]:
# Import Dependencies
import pandas as pd
import numpy as np
import re
import spacy
import sqlite3
from spacy.attrs import ORTH, LEMMA, NORM, TAG
from datetime import datetime, timedelta
from spacy.attrs import ORTH, LEMMA, NORM, TAG
from clean import replace_emoticons, clean_text, clean_tweet
from joblib import load

In [7]:
# Load machine learning prediction model
model = load("twitter_model.joblib")

# Import spacy nlp library
nlp = spacy.load('en_core_web_sm', parser=False, entity=False) 

In [8]:
# Connect to database and create twitter table
conn = sqlite3.connect("politics_db.sqlite")
conn.execute("DROP TABLE IF EXISTS twitter;")
conn.execute("""CREATE TABLE "twitter" (
            "id" TEXT UNIQUE,
            "twitter_id" INTEGER,
            "tweet" TEXT, 
            "tweet_date" TEXT,
            "retweet" INTEGER,
            "favorite" INTEGER,
            "username" TEXT,
            "sentiment" INTEGER,
            PRIMARY KEY (id)
            )""")

<sqlite3.Cursor at 0x1a1815f180>

In [None]:
### SMITA - WRITE CODE TO EXTRACT USERNAMES FROM DATABASE SO YOU END UP WITH SOMETHING THAT LOOKS LIKE THIS:
# usernames = ["@JoeBiden", "@BernieSanders", "@ewarren", "@KamalaHarris"]

In [9]:
# Set time frame

usernames = ["@JoeBiden", "@BernieSanders", "@ewarren", "@KamalaHarris"]
min_date = datetime(2019, 8, 1)
max_date = datetime(2019, 8, 7)

In [10]:
for username in usernames:

    # Import candidate's tweets into a dataframe
    twitter_df = pd.read_csv(f"data/{username}.csv", error_bad_lines=False)

    # Convert date from UTC to EDT
    twitter_df["tweet_date"] = twitter_df["tweet_date"].\
        map(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S") - timedelta(hours=4))

    # Delete tweets out of date range and remove time
    twitter_df = twitter_df[(twitter_df["tweet_date"] <= max_date) & (twitter_df["tweet_date"] >= min_date)]
    twitter_df["tweet_date"] = pd.to_datetime(twitter_df["tweet_date"]).dt.strftime("%m-%d")
    
    # Clean text of tweets using previously defined clean_tweet function
    twitter_df["tweet"] = twitter_df["tweet"].map(lambda x: clean_tweet(x, nlp))
    
    # Remove empty tweets strings
    twitter_df = twitter_df.dropna(axis=0, subset=["tweet"])
    
    # Apply model to tweet column to get sentiment prediction
    twitter_df["sentiment"] = model.predict(twitter_df["tweet"].values.astype("U"))
    
    # Create username column with candidate's username
    twitter_df["username"] = username
    
    # Create key_id column to work as unique primary key, as a single tweet can apply to multiple candidates
    twitter_df["id"] = twitter_df["twitter_id"].map(lambda x: username + str(x))
    
    # Set index to be id column
    twitter_df = twitter_df.set_index("id")
    
    # Add candidate's tweets to twitter table
    twitter_df.to_sql("twitter", conn, if_exists="append") 