Skip to content

Commit

Permalink
Added NHL data gathering to Eternal Process.
Browse files Browse the repository at this point in the history
  • Loading branch information
FredLoh committed Mar 18, 2016
1 parent e189f57 commit a039fd2
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 16 deletions.
52 changes: 46 additions & 6 deletions Twitter_Utils/EternalProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def start_process(self):
It has to check if a game is starting and if that is the case, fork the process,
And in that new process check for game data during the time period assigned to it.
"""
self.wait_till_five_seconds_into_minute()
# self.wait_till_five_seconds_into_minute()
start_time = time.time()
print(50 * '*' + '\n' + 10 * '*' + ' STARTING SCANNING PROCESS ' + 10 * '*' + '\n' + 50 * '*')
while True:
Expand All @@ -61,31 +61,40 @@ def start_process(self):
self.check_if_stream_should_end()

if self.is_time_to_get_game_data_for_day():
self.write_days_games_data()
self.march_madness.write_days_games_data()
self.write_days_games_data_for_nba()
self.write_days_games_data_for_nhl()
# self.march_madness.write_days_games_data()

# Read in file to see if it is time to analyze twitter
read_path = self.get_write_path_for_days_games()

db = self.get_aws_mongo_db()
data_nba = []
data_nhl = []
for post in db.nba_logs.find():
if post['date'] == datetime.datetime.now().strftime('%Y-%m-%d'):
data_nba.append(post)

for post in db.nhl_logs.find():
if post['date'] == datetime.datetime.now().strftime('%Y-%m-%d'):
data_nhl.append(post)

data_mm = self.march_madness.return_games_for_the_day()

try:
with open(read_path):
current_time = self.get_time_as_hour_minute()
self.logger.info('Current Time: ' + current_time)
self.iterate_through_nba_games_and_start_stream(data_nba=data_nba, current_time=current_time)
self.iterate_through_nhl_games_and_start_stream(data_nhl=data_nhl, current_time=current_time)
self.iterate_through_march_madness_games_and_start_stream(data_mm=data_mm,
current_time=current_time)

except IOError:
self.logger.error('File not found at ' + read_path)
self.write_days_games_data()
self.write_days_games_data_for_nba()
self.write_days_games_data_for_nhl()
# self.march_madness.write_days_games_data()
continue

self.sleep_for(self.tick_time_in_seconds, start_time)
Expand Down Expand Up @@ -123,6 +132,18 @@ def iterate_through_nba_games_and_start_stream(self, data_nba, current_time):
keyword_string = self.create_keyword_string_for_game(game)
self.start_stream_with_keywords(keyword_string, game)

def iterate_through_nhl_games_and_start_stream(self, data_nhl, current_time):
for idx, game in enumerate(data_nhl):
game_time = self.generate_game_time(game)
self.logger.info('NHL Game Time: ' + game_time)

if game_time == current_time and not game['being_streamed']:
self.update_is_streamed_json(game)
self.logger.info('Acquiring twitter data for ' + str(game["title"]))

keyword_string = self.create_keyword_string_for_game(game)
self.start_stream_with_keywords(keyword_string, game)

def start_stream_with_keywords(self, keyword_string, game):
game_name = self.create_game_name_from_title(game)
self.game_name_list.append(game_name)
Expand Down Expand Up @@ -335,9 +356,9 @@ def get_game_name_base_file_path(self, index):
return path + '/Twitter_Utils/data/tweets/' + game_name + '/' + game_name + '.txt'

# TODO - Figure out how to test this
def write_days_games_data(self):
def write_days_games_data_for_nba(self):
"""
Writes API response containing info for days games
Writes API response containing info for days games in the NBA
"""
db = self.get_aws_mongo_db()
write_path = self.get_write_path_for_days_games()
Expand All @@ -354,6 +375,25 @@ def write_days_games_data(self):
self.logger.error('Unable to write at ' + write_path)
raise IOError

def write_days_games_data_for_nhl(self):
"""
Writes API response containing info for days games in the NHL
"""
db = self.get_aws_mongo_db()
write_path = self.get_write_path_for_days_games()
data_to_write = self.sports_data.get_nhl_games_for_today()
try:
with open(write_path, 'w+') as f:
f.write(data_to_write)
f.close()
with open(write_path) as data_file:
data = json.load(data_file)
db.nhl_logs.insert(data)
except IOError:
self.logger.exception(IOError)
self.logger.error('Unable to write at ' + write_path)
raise IOError

def remove_first_line_from_file(self, path):
"""
Removes first line from file
Expand Down
21 changes: 18 additions & 3 deletions Twitter_Utils/SportsData.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,28 @@ def __init__(self):
self.CLIENT_ID = CommonUtils.get_environ_variable('STAT_CLIENT_ID')
self.STAT_CLIENT_SECRET = CommonUtils.get_environ_variable('STAT_CLIENT_SECRET')
self.STAT_ACCESS_TOKEN = CommonUtils.get_environ_variable('STAT_ACCESS_TOKEN')
self.base_url = 'https://www.stattleship.com/basketball/'
self.base_url_basketball = 'https://www.stattleship.com/basketball/'
self.base_url_hockey = 'https://www.stattleship.com/hockey/'
self.logger = logging.getLogger(__name__)

def get_nba_games_for_today(self):
"""Gets all games for today"""
self.logger.info('Getting games for today.')
url = self.base_url + '/nba/games?on=today'
url = self.base_url_basketball + '/nba/games?on=today'
headers = {
'Authorization': str(self.STAT_ACCESS_TOKEN),
'Accept': 'application/vnd.stattleship.com; version=1',
'Content-Type': 'application/json'
}

res = requests.get(url, headers=headers)
content = json.loads(res.content)
return self.create_game_log_object(content['games'])

def get_nhl_games_for_today(self):
"""Gets all games for today"""
self.logger.info('Getting games for today.')
url = self.base_url_hockey + '/nhl/games?on=today'
headers = {
'Authorization': str(self.STAT_ACCESS_TOKEN),
'Accept': 'application/vnd.stattleship.com; version=1',
Expand Down Expand Up @@ -64,7 +79,7 @@ def get_nba_players_for_today(self):
""" Gets results for games played already for the day, if no games
have been played then no results appear"""
# TODO - Change back to today rather than date with games
url = self.base_url + '/nba/game_logs?on=March-5'
url = self.base_url_basketball + '/nba/game_logs?on=March-5'
headers = {
'Authorization': str(self.STAT_ACCESS_TOKEN),
'Accept': 'application/vnd.stattleship.com; version=1',
Expand Down
10 changes: 4 additions & 6 deletions Twitter_Utils/TweetProcessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"""

class TweetProcessor:

def __init__(self):
pass

Expand Down Expand Up @@ -60,6 +59,8 @@ def standardize_tweet(self, tweet):

tweet = self.remove_appended_url_or_user(tweet)

tweet = self.lemmatize_tweet(tweet)

return tweet

@staticmethod
Expand Down Expand Up @@ -116,7 +117,7 @@ def lemmatize_tweet(self, tweet):
for word in tweet:
stemmed_word = wordnet_lemmatizer.lemmatize(word)
new_tweet = new_tweet + stemmed_word
print new_tweet
return new_tweet


class Filter:
Expand All @@ -131,7 +132,4 @@ def check_words_in_tweet(self, tweet):

def add_key_words_to_tweet(self):
word_list = ['nba','basketball', 'ball']
return ' '.join(word[0] for word in word_list)

f = TweetProcessor()
f.lemmatize_tweet('what does this do I am wondering if future parts of speech make a difference in this at all')
return ' '.join(word[0] for word in word_list)
2 changes: 1 addition & 1 deletion Twitter_Utils/tests/test_EternalProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_get_write_path_for_days_games(self):
path = wd
base = path + '/Twitter_Utils/data/daily-logs/'
end = datetime.datetime.now().strftime('%Y-%m-%d') + '.json'
self.assertEqual(base + end, eternal_process.get_write_path_for_days_games())
self.assertEqual(base + end, eternal_process.get_write_path_for_days_games_nba())

def test_is_time_to_get_game_data_for_day(self):
eternal_process = EternalProcess()
Expand Down

0 comments on commit a039fd2

Please sign in to comment.