In [70]:
import pandas as pd
import sqlite3
import time
from datetime import datetime

In [2]:
#Creating connection with sqlite database
def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
    return conn

In [3]:
#Setting up the data for the analysis

#Create the connection
conn = create_connection('R_NYU.db')

#Getting the data into pandas dataframe
posts = pd.read_sql("SELECT * FROM post", conn)
users = pd.read_sql("SELECT * FROM user", conn)
comments = pd.read_sql("SELECT * FROM comment", conn)
subreddits = pd.read_sql("SELECT * FROM subreddit", conn)

#Closing connection
conn.close()

In [4]:
#Removing bots
bots = subreddits[subreddits['name'].str.endswith('bot')]
bot_names = set(bots['name'].tolist())

for name in bot_names:
    users = users[users['name'] != name]
    posts = posts[posts['name'] != name]
    subreddits = subreddits[subreddits['name'] != name]
    comments = comments[comments['name'] != name]

In [None]:
###### QUESTIONS TO INVESTIGATE ######

'''
1, First post
2, Post with most upvotes/downvotes
3, Comments with most upvotes/downvotes
4, Does NYU subreddit fills with application questions?
5, How often does users posts/comments in NYU subreddit
6, where else do users posts -> most popular subreddits
7, Type of students in NYU subreddit
8, money related posts questions
9, Most common word (using TFDIF), try to remove filler
10, Does NYU subreddit represent NYU in general
11, post activity over the year
12, Frequency of posts/comments in the subreddit
'''

In [85]:
first_post = posts.sort_values(['created_utc'], ascending = True).head(1)
form = '%Y-%m-%d'
fpost_date = time.strftime('%Y-%m-%d', time.localtime(first_post['created_utc']))
fpost_date = datetime.strptime(fpost_date, form)
created_date = datetime(2009,11,4)
diff = fpost_date - created_date
diff = diff.days
year = 0
while(diff > 0):
    curr = diff - 365
    if curr >= 0:
        year += 1
    else:
        diff = curr
print('First post created :' + str(curr) + ' years and ' + str(diff) + ' days after the subreddit was created')

KeyboardInterrupt: 

In [44]:
#First 10 post on NYU subreddit
first_posts = posts.sort_values(['created_utc'], ascending = [True]).head(10)
standing = 1
for index, row in first_posts.iterrows():
    print('Number: ' + str(standing))
    print('Post Title: \"' + row['title'] + '\"')
    print('Author: ' + row['name'])
    print('Score: ' + str(row['score']))
    date_posted = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['created_utc']))
    print('Posted on: \"' + date_posted + '\"')
    print('Link to post: ' + row['permalink'])
    print('---------------------------------------------')
    standing += 1
    
#It is interesting to note that while r/NYU itself was created on November 4th, 2009, 
#it appears that the first post was not made until November 28th, 2010.
# which is

Number: 1
Post Title: "No NYU love?"
Author: no_pun_intendid
Score: 5
Posted on: "2010-11-28 12:23:51"
Link to post: /r/nyu/comments/ecwp1/no_nyu_love/
---------------------------------------------
Number: 2
Post Title: "Are most of you rich?"
Author: YouBestBeTrollin
Score: 1
Posted on: "2010-11-30 19:36:54"
Link to post: /r/nyu/comments/ee7am/are_most_of_you_rich/
---------------------------------------------
Number: 3
Post Title: "So, I go to Stevens in Hoboken..."
Author: TODizzle91
Score: 4
Posted on: "2010-12-04 20:20:34"
Link to post: /r/nyu/comments/egcla/so_i_go_to_stevens_in_hoboken/
---------------------------------------------
Number: 4
Post Title: "Procrastination from Finals!"
Author: sallyNYU
Score: 4
Posted on: "2010-12-09 18:53:37"
Link to post: /r/nyu/comments/ej8a1/procrastination_from_finals/
---------------------------------------------
Number: 5
Post Title: "Are you a Flash programmer or do you know a Flash programmer who would like to team up with an artist and m

In [22]:
first_posts

Unnamed: 0,id,name,url,title,content,score,created_utc,permalink,link_flair_text
4823,ecwp1,no_pun_intendid,https://www.reddit.com/r/nyu/comments/ecwp1/no...,No NYU love?,Where are all my Violets?,5,1290965000.0,/r/nyu/comments/ecwp1/no_nyu_love/,
4822,ee7am,YouBestBeTrollin,https://www.reddit.com/r/nyu/comments/ee7am/ar...,Are most of you rich?,"NYU is a rather expensive school, are most of ...",1,1291164000.0,/r/nyu/comments/ee7am/are_most_of_you_rich/,
4821,egcla,TODizzle91,https://www.reddit.com/r/nyu/comments/egcla/so...,"So, I go to Stevens in Hoboken...",I stopped by early last year and talked to som...,4,1291512000.0,/r/nyu/comments/egcla/so_i_go_to_stevens_in_ho...,
4820,ej8a1,sallyNYU,https://www.reddit.com/r/nyu/comments/ej8a1/pr...,Procrastination from Finals!,I am procrastinating from completing my finals...,4,1291939000.0,/r/nyu/comments/ej8a1/procrastination_from_fin...,
4819,eojaf,Hellapeno,https://www.reddit.com/r/nyu/comments/eojaf/ar...,Are you a Flash programmer or do you know a Fl...,\n\nI've really been focusing on improving my ...,2,1292809000.0,/r/nyu/comments/eojaf/are_you_a_flash_programm...,
4818,f394n,SirFrags,https://www.reddit.com/r/nyu/comments/f394n/so...,"So I have looked around The Princeton Review, ...",I am looking to apply in about 6 months (High ...,5,1295183000.0,/r/nyu/comments/f394n/so_i_have_looked_around_...,
4817,fj4zd,hannybar,https://www.reddit.com/r/nyu/comments/fj4zd/he...,Hey NYU journalism students: what's it going t...,Hey NYU journalism students: what's it going t...,1,1297386000.0,/r/nyu/comments/fj4zd/hey_nyu_journalism_stude...,
4816,fjthl,howie549,https://www.reddit.com/r/nyu/comments/fjthl/an...,Anyone know what happened to Yummy House?,Tried ordering the other day and no answer the...,1,1297473000.0,/r/nyu/comments/fjthl/anyone_know_what_happene...,
4815,fu1s7,CatboyMac,https://www.reddit.com/r/nyu/comments/fu1s7/ny...,"""NYU, what's up!"" - James Franco at tonight's ...",,0,1298863000.0,/r/nyu/comments/fu1s7/nyu_whats_up_james_franc...,
4814,fuliq,Drantis,http://www.nbcnewyork.com/news/NYU-Alums-Senio...,"""NYU, WHAT UP."" -James Franco (who else fist-p...",,1,1298931000.0,/r/nyu/comments/fuliq/nyu_what_up_james_franco...,
