# Exploring Hacker News Posts

## In this project we are interested in posts from this csv whose titles begin with either "Ask HN" or "Show HN". Users can submit "Ask HN posts to ask the Hacker community a specific question.

In [58]:
# Read in the CSV 
from csv import reader
import datetime as dt
opened_file = open('hacker_news.csv')
read_file = reader(opened_file)
hn = list(read_file)

# Show the first 5 rows of data
print(hn[:5])

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12579008', 'You have two days to comment if you want stem cells to be classified as your own', 'http://www.regulations.gov/document?D=FDA-2015-D-3719-0018', '1', '0', 'altstar', '9/26/2016 3:26'], ['12579005', 'SQLAR  the SQLite Archiver', 'https://www.sqlite.org/sqlar/doc/trunk/README.md', '1', '0', 'blacksqr', '9/26/2016 3:24'], ['12578997', 'What if we just printed a flatscreen television on the side of our boxes?', 'https://medium.com/vanmoof/our-secrets-out-f21c1f03fdc8#.ietxmez43', '1', '0', 'pavel_lishin', '9/26/2016 3:19'], ['12578989', 'algorithmic music', 'http://cacm.acm.org/magazines/2011/7/109891-algorithmic-composition/fulltext', '1', '0', 'poindontcare', '9/26/2016 3:16']]


In [59]:
# Show only column names
headers = hn[0]
print(headers)

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']


In [60]:
# Show the rest of the data
print(hn[:5])

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12579008', 'You have two days to comment if you want stem cells to be classified as your own', 'http://www.regulations.gov/document?D=FDA-2015-D-3719-0018', '1', '0', 'altstar', '9/26/2016 3:26'], ['12579005', 'SQLAR  the SQLite Archiver', 'https://www.sqlite.org/sqlar/doc/trunk/README.md', '1', '0', 'blacksqr', '9/26/2016 3:24'], ['12578997', 'What if we just printed a flatscreen television on the side of our boxes?', 'https://medium.com/vanmoof/our-secrets-out-f21c1f03fdc8#.ietxmez43', '1', '0', 'pavel_lishin', '9/26/2016 3:19'], ['12578989', 'algorithmic music', 'http://cacm.acm.org/magazines/2011/7/109891-algorithmic-composition/fulltext', '1', '0', 'poindontcare', '9/26/2016 3:16']]


In [61]:
# Create lists to filter data
ask_posts = []
show_posts = []
other_posts = []

# Loop through the data to add to lists
for row in hn:
    title = row[1]
    title = title.lower()
    if title.startswith('ask hn'):
        ask_posts.append(row)
    elif title.startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)
        
# Check the number of posts in lists
print(len(ask_posts))
print(len(show_posts))
print(len(other_posts))

9139
10158
273823


In [62]:
# Check ask posts list
print(ask_posts[:5])

[['12578908', 'Ask HN: What TLD do you use for local development?', '', '4', '7', 'Sevrene', '9/26/2016 2:53'], ['12578522', 'Ask HN: How do you pass on your work when you die?', '', '6', '3', 'PascLeRasc', '9/26/2016 1:17'], ['12577908', 'Ask HN: How a DNS problem can be limited to a geographic region?', '', '1', '0', 'kuon', '9/25/2016 22:57'], ['12577870', 'Ask HN: Why join a fund when you can be an angel?', '', '1', '3', 'anthony_james', '9/25/2016 22:48'], ['12577647', 'Ask HN: Someone uses stock trading as passive income?', '', '5', '2', '00taffe', '9/25/2016 21:50']]


In [63]:
# Check show posts list
print(show_posts[:5])

[['12578335', 'Show HN: Finding puns computationally', 'http://puns.samueltaylor.org/', '2', '0', 'saamm', '9/26/2016 0:36'], ['12578182', 'Show HN: A simple library for complicated animations', 'https://christinecha.github.io/choreographer-js/', '1', '0', 'christinecha', '9/26/2016 0:01'], ['12578098', 'Show HN: WebGL visualization of DNA sequences', 'http://grondilu.github.io/dna.html', '1', '0', 'grondilu', '9/25/2016 23:44'], ['12577991', 'Show HN: Pomodoro-centric, heirarchical project management with ES6 modules', 'https://github.com/jakebian/zeal', '2', '0', 'dbranes', '9/25/2016 23:17'], ['12577142', 'Show HN: Jumble  Essays on the go #PaulInYourPocket', 'https://itunes.apple.com/us/app/jumble-find-startup-essay/id1150939197?ls=1&mt=8', '1', '1', 'ryderj', '9/25/2016 20:06']]


In [64]:
# Count the number of comments in ask posts
total_ask_comments = 0

for row in ask_posts:
    comment = row[4]
    if comment in row:
        total_ask_comments += 1

avg_ask_comments = total_ask_comments / len(ask_posts)
        
# Count the number of comments in show posts
total_show_comments = 0

for row in show_posts:
    comment = row[4]
    if comment in row:
        total_show_comments += 1
        
avg_show_comments = total_show_comments / len(show_posts)
        
# Show how avg comments per list        
print(avg_ask_comments)
print(avg_show_comments)

1.0
1.0


### Both lists had comments for each row in the dataset so have the same average number of comments

In [119]:
# Calc the number of posts per hour
result_list = []

for row in ask_posts:
    created = row[6]
    comment = int(row[4])
    result_list.append([created, comment])
    
print(result_list[:5])
    
# Count the number of hours in list
counts_by_hour = {}
comments_by_hour = {}

for row in result_list:
    time = row[0]
    comment = row[1]
    time_dt = dt.datetime.strptime(time, "%m/%d/%Y %H:%M")
    hour = time_dt.strftime("%I")
    if hour not in counts_by_hour:
        counts_by_hour[hour] = 1
        comments_by_hour[comment] = comment
    else:
        counts_by_hour[hour] += 1
        comments_by_hour[comment] = comment    

[['9/26/2016 2:53', 7], ['9/26/2016 1:17', 3], ['9/25/2016 22:57', 0], ['9/25/2016 22:48', 3], ['9/25/2016 21:50', 2]]


In [159]:
# Calc avg comments per hour
avg_comments_hour = []

for comment in comments_by_hour:
    for count in counts_by_hour:
        if count == comment:
            avg_comments_hour.append([comment, round(comments_by_hour[comment]/counts_by_hour[count],2)])
        else:
            continue
        
print(avg_comments_hour)

[['02', 0.01], ['01', 0.01], ['10', 0.01], ['09', 0.03], ['07', 0.0], ['05', 0.0], ['03', 0.0], ['11', 0.0], ['08', 0.0], ['04', 0.02], ['12', 0.0], ['06', 0.02]]
