## Understanding Seasonal Trends through NLP
- Abigail Bojorquez
- Zachary Romero

### Project Proposal: 
Analyzing Twitter data visualizes trends in common cold/flu symptoms vs pneumonia/bronchitis symptoms to educate health professionals.


In [1]:
## Standard Imports
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import spacy 
import csv
import tweepy as tweepy
from spacy.lang.en.stop_words import STOP_WORDS
from geopy.geocoders import Nominatim
from textblob import TextBlob
## Importing the OS and JSON Modules
import os
import json

geolocator = Nominatim(user_agent="UCI_Tweet_Illness_App")
nlp = spacy.load('en_core_web_sm')

## Importing API Credentials

In [2]:
with open('/Users/abigailbojorquez/.secret/twitter_api.json') as f:
    login = json.load(f)
    
    login.keys()

In [3]:
auth = tweepy.OAuthHandler(login['consumer_key'], login['consumer_secret'])
auth.set_access_token(login['access_token'], login['access_secret'])
 
twitter_api = tweepy.API(auth)

In [4]:
# Quick Test Query
search_words = 'cold OR flu OR pneumonia OR bronchitis'

results = tweepy.Cursor(twitter_api.search_tweets, 
                        q=search_words, 
                        lang='en', 
                        since_id='2023-02-20').items(5)
for tweets in results:
    print(tweets.text)

RT @NYCountybirders: Birds are currently suffering from their own pandemic. Please do not touch, handle birds (like hand feeding) unless ab…
RT @ShyJoanie: Atiku’s lawyer: My lord, no be say we nor rig ooo, but the one wey APC do ehn, cold catch me.
Walking down Rainier Ave and the air smells like weed and grease and cold trees and I love it all so much.
@FBananapants @FourWinns298 So, explain the flu, bananapants. 😄
@HdtSling @NYSCounties @CEJRyanMcMahon @OnondagaCounty @GovKathyHochul @forcedanarchy @hannon44 @evilgermangirl The… https://t.co/VypPH257UF


In [5]:
# this will tell us how many
limits = twitter_api.rate_limit_status()
print('Calls Left: ')
print(limits['resources']['search']['/search/tweets']['remaining'])
reset_time = limits['resources']['search']['/search/tweets']['reset']
dt = datetime.fromtimestamp(reset_time)
print('\nTime Until Reset: ')
print(dt)

Calls Left: 
179

Time Until Reset: 
2023-03-02 14:50:39


In [6]:
# Quick Test Query

search_words = 'cold OR flu OR pneumonia OR bronchitis since:2022-01-01 -filter:retweets'
location = "39.8,-95.583068847656,2500km"  # Geographical center of the US with 2500km radius
language = "en"
result_type = "recent"

tweets = tweepy.Cursor(twitter_api.search_tweets, 
                        q=search_words, 
                        geocode=location,
                        lang=language).items(1000)

### Filters Tweets and Import into Data Frame

In [7]:
filtered_tweets = []

for tweet in tweets:
    if tweet.place is not None:
        filtered_tweets.append({
            'text': tweet.text,
            'location': tweet.place.full_name,
            'date': tweet.created_at.date()
        })
        
tweets_with_loc = pd.DataFrame(filtered_tweets)
tweets_with_loc.to_csv('test_tweets.csv', index=False)

## Creating Data Frame of Test_tweets

In [8]:
df = pd.read_csv('test_tweets.csv')
df.head()

Unnamed: 0,text,location,date
0,"Introducing, Terminally Chill. 🧊 Cold IPAs hav...",Knee Deep Brewing Company,2023-03-02
1,@LoriJulia Would you rather: it be raining eve...,"Savage, MN",2023-03-02
2,It may be cold in #LA but the #networking is h...,Blankspaces,2023-03-02
3,Poetry gone cold. https://t.co/ikudLjCAbX,"Brooklyn, Portland",2023-03-02
4,Blueskies still cold out there,"Tucson, AZ",2023-03-02
