In [330]:
!pip install nltk
!pip install plotly
!pip install ipywidgets
!pip install vaderSentiment
import pandas as pd
import sqlite3
import time
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)         # initiate notebook for offline plot
!pip install sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer







In [264]:
#Creating connection with sqlite database
def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
    return conn

In [302]:
#Setting up the data for the analysis

#Create the connection
conn = create_connection('R_NYU.db')

#Getting the data into pandas dataframe
posts = pd.read_sql("SELECT * FROM post", conn)
users = pd.read_sql("SELECT * FROM user", conn)
comments = pd.read_sql("SELECT * FROM comment", conn)
subreddits = pd.read_sql("SELECT * FROM subreddit", conn)

#Closing connection
conn.close()

In [266]:
#Removing bots
bots = subreddits[subreddits['name'].str.endswith('bot')]
bot_names = set(bots['name'].tolist())

for name in bot_names:
    users = users[users['name'] != name]
    posts = posts[posts['name'] != name]
    subreddits = subreddits[subreddits['name'] != name]
    comments = comments[comments['name'] != name]

In [None]:
###### QUESTIONS TO INVESTIGATE ######

'''
1, First post
2, Post with most upvotes/downvotes
3, Comments with most upvotes/downvotes
4, Does NYU subreddit fills with application questions?
5, How often does users posts/comments in NYU subreddit
6, where else do users posts -> most popular subreddits
7, Type of students in NYU subreddit
8, money related posts questions
9, Most common word (using TFDIF), try to remove filler
11, post activity over the year
12, Frequency of posts/comments in the subreddit
'''

In [329]:
#list of defaults subreddits that users are subscribed to and the number of posts
defaults = ['AskReddit','funny','pics','todayilearned','gaming','videos','IAmA','worldnews','news','aww','gifs','movies',
'mildlyinteresting','Showerthoughts','Music','science','explainlikeimfive','LifeProTips','personalfinance']

subreds = subreddits['subreddit'].value_counts().index.tolist()[:30]
other_subred = subreddits['subreddit'].value_counts().tolist()
other_user = subreddits['subreddit'].value_counts().index.tolist()

c_def = 0
c_oth = 0
fig = go.Figure()
for i, sub in enumerate(subreds) :
    # add a new bar to the graph
    fig.add_trace(go.Bar(
        x=[sub],
        y=[other_subred[i]],
        name='Default Subreddits' if sub in defaults else 'Other',
        marker_color='pink' if sub in defaults else 'blue',
        showlegend=True if c_def == 0 and sub in defaults or c_oth == 0 and sub not in defaults else False,
        legendgroup='pink' if sub in defaults else 'blue'
    ))
    c_def += 1 if sub in defaults else 0
    c_oth += 1 if sub not in defaults else 0
    
# plot the graph
fig.update_layout(yaxis_title="Number of posts", xaxis_title="Subreddit", title="Most Popular Subreddits from Users in r/NYU")
fig.show()

In [171]:
#Investigate if NYU subreddit is filled with application posts
#Approach 1: Naive Approach - Checking every words in the title
common_words = ['application', 'essay', 'essays','ED1','EDII','admissions',
                'acceptance', 'acceptance rate','Internal Transfer','Transfer',
               'apply', 'applying', 'app','Application', 'applicant', 
                'applicants', 'apply to','ED', 'Chance me', 'Chances', 
                'ED 1','ED II','EDI']

count = 0 
titles = posts['title'].tolist()
contents = posts['content'].tolist()

#Creating a list with id to prevent duplicates values
applications = list()

#Check both title and content of post
for index, row in posts.iterrows():
    if any(word in row['title'] for word in common_words):
        applications.append(row['id'])
        count+=1
        
for index, row in posts.iterrows():
    if row['id'] not in applications:
        if any(word in row['content'] for word in common_words):
            count+=1
            
#To Do: Plot this value against the number of posts in NYU subreddit        
print('Post about application', str(count))



Post about application 1483


In [169]:
#How many time is Stern mentions in both posts and comments
words = ['Stern','stern','sternie']
post_count = 0
comment_count = 0
posts_stern = posts['content'].tolist()
for content in contents:
    if any(word in content for word in words):
        post_count += 1
comments_stern = comments['content'].tolist()
for comment in comments_stern:
    if any(word in comment for word in words):
        comment_count += 1
        
print(str(post_count), 'Post about Stern and ', str(comment_count), 'Comments about Stern')

Unnamed: 0,id,name,url,title,content,score,created_utc,permalink,link_flair_text
0,ehaes3,somedude297,https://www.reddit.com/r/nyu/comments/ehaes3/i...,Internal Transfer to Stern,What is the word count for the personal statem...,1,1.577649e+09,/r/nyu/comments/ehaes3/internal_transfer_to_st...,
1,ehacsf,skt_fekar,https://www.reddit.com/r/nyu/comments/ehacsf/a...,Are electives generally harder than the requir...,I just took my last required course (Basic Alg...,1,1.577648e+09,/r/nyu/comments/ehacsf/are_electives_generally...,
2,eha7dc,Funkphlex,https://v.redd.it/uqbufxdpgm741,BYOB-friendly New Years Eve Dance Party & Vari...,,6,1.577648e+09,/r/nyu/comments/eha7dc/byobfriendly_new_years_...,
3,eh7v8h,sarampagnepapi,https://www.reddit.com/r/nyu/comments/eh7v8h/l...,last min photographers in manhattan?,im here until jan 1st and im looking for a sup...,1,1.577637e+09,/r/nyu/comments/eh7v8h/last_min_photographers_...,
4,eh7hnm,jerseyskies,https://www.reddit.com/r/nyu/comments/eh7hnm/i...,is it okay if i tuen in my film portfolio today?,i know they said at least 2 weeks prior to the...,1,1.577635e+09,/r/nyu/comments/eh7hnm/is_it_okay_if_i_tuen_in...,
...,...,...,...,...,...,...,...,...,...
4819,eojaf,Hellapeno,https://www.reddit.com/r/nyu/comments/eojaf/ar...,Are you a Flash programmer or do you know a Fl...,\n\nI've really been focusing on improving my ...,2,1.292809e+09,/r/nyu/comments/eojaf/are_you_a_flash_programm...,
4820,ej8a1,sallyNYU,https://www.reddit.com/r/nyu/comments/ej8a1/pr...,Procrastination from Finals!,I am procrastinating from completing my finals...,4,1.291939e+09,/r/nyu/comments/ej8a1/procrastination_from_fin...,
4821,egcla,TODizzle91,https://www.reddit.com/r/nyu/comments/egcla/so...,"So, I go to Stevens in Hoboken...",I stopped by early last year and talked to som...,4,1.291512e+09,/r/nyu/comments/egcla/so_i_go_to_stevens_in_ho...,
4822,ee7am,YouBestBeTrollin,https://www.reddit.com/r/nyu/comments/ee7am/ar...,Are most of you rich?,"NYU is a rather expensive school, are most of ...",1,1.291164e+09,/r/nyu/comments/ee7am/are_most_of_you_rich/,


In [318]:
#Approach 2: Using NLTK library
stopword = set(stopwords.words('english'))

def clear_filler(content):
    word_token = word_tokenize(content)
    sentence = [w for w in word_token if not w in stopword] 
    return ' '.join(sentence)

In [184]:
#Cleaning up the title and content and added as a new column in the dataframe
title_clean = posts['title'].apply(clear_filler)
content_clean = posts['content'].apply(clear_filler)

posts['doc'] = title_clean.map(str) + ' ' + content_clean

In [189]:
titles = posts['title'].tolist()
docs = posts['doc'].tolist()

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(docs)
posts_tfidf = pd.DataFrame(X.T.todense(), index=vectorizer.get_feature_names(), columns = titles)

posts

Unnamed: 0,id,name,url,title,content,score,created_utc,permalink,link_flair_text,doc
0,ehaes3,somedude297,https://www.reddit.com/r/nyu/comments/ehaes3/i...,Internal Transfer to Stern,What is the word count for the personal statem...,1,1.577649e+09,/r/nyu/comments/ehaes3/internal_transfer_to_st...,,Internal Transfer Stern What word count person...
1,ehacsf,skt_fekar,https://www.reddit.com/r/nyu/comments/ehacsf/a...,Are electives generally harder than the requir...,I just took my last required course (Basic Alg...,1,1.577648e+09,/r/nyu/comments/ehacsf/are_electives_generally...,,Are electives generally harder required course...
2,eha7dc,Funkphlex,https://v.redd.it/uqbufxdpgm741,BYOB-friendly New Years Eve Dance Party & Vari...,,6,1.577648e+09,/r/nyu/comments/eha7dc/byobfriendly_new_years_...,,BYOB-friendly New Years Eve Dance Party & Vari...
3,eh7v8h,sarampagnepapi,https://www.reddit.com/r/nyu/comments/eh7v8h/l...,last min photographers in manhattan?,im here until jan 1st and im looking for a sup...,1,1.577637e+09,/r/nyu/comments/eh7v8h/last_min_photographers_...,,last min photographers manhattan ? im jan 1st ...
4,eh7hnm,jerseyskies,https://www.reddit.com/r/nyu/comments/eh7hnm/i...,is it okay if i tuen in my film portfolio today?,i know they said at least 2 weeks prior to the...,1,1.577635e+09,/r/nyu/comments/eh7hnm/is_it_okay_if_i_tuen_in...,,okay tuen film portfolio today ? know said lea...
...,...,...,...,...,...,...,...,...,...,...
4819,eojaf,Hellapeno,https://www.reddit.com/r/nyu/comments/eojaf/ar...,Are you a Flash programmer or do you know a Fl...,\n\nI've really been focusing on improving my ...,2,1.292809e+09,/r/nyu/comments/eojaf/are_you_a_flash_programm...,,Are Flash programmer know Flash programmer wou...
4820,ej8a1,sallyNYU,https://www.reddit.com/r/nyu/comments/ej8a1/pr...,Procrastination from Finals!,I am procrastinating from completing my finals...,4,1.291939e+09,/r/nyu/comments/ej8a1/procrastination_from_fin...,,Procrastination Finals ! I procrastinating com...
4821,egcla,TODizzle91,https://www.reddit.com/r/nyu/comments/egcla/so...,"So, I go to Stevens in Hoboken...",I stopped by early last year and talked to som...,4,1.291512e+09,/r/nyu/comments/egcla/so_i_go_to_stevens_in_ho...,,"So , I go Stevens Hoboken ... I stopped early ..."
4822,ee7am,YouBestBeTrollin,https://www.reddit.com/r/nyu/comments/ee7am/ar...,Are most of you rich?,"NYU is a rather expensive school, are most of ...",1,1.291164e+09,/r/nyu/comments/ee7am/are_most_of_you_rich/,,"Are rich ? NYU rather expensive school , folks..."


In [204]:
# using KMeans, cluster the data into a set number of categories
true_k = 15
r = 728 # KMeans is non-deterministic unless we specify the random seed
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, random_state = r)

# fit the model
model.fit(X)
print("Top terms per cluster:")
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(true_k):
    print("Cluster %d:" % i),
    for ind in order_centroids[i, :10]:
        print(' %s' % terms[ind])
    
# # free up extra space
# del df_post_tfidf

Top terms per cluster:
Cluster 0:
 game
 library
 bobst
 design
 nyu
 center
 access
 play
 games
 hey
Cluster 1:
 nyu
 know
 does
 student
 wondering
 thanks
 like
 year
 school
 going
Cluster 2:
 survey
 com
 https
 www
 help
 forms
 nyu
 qualtrics
 jfe
 link
Cluster 3:
 campus
 housing
 room
 live
 dorm
 living
 looking
 nyu
 freshman
 bed
Cluster 4:
 school
 nyu
 program
 transfer
 tisch
 film
 like
 major
 know
 want
Cluster 5:
 stern
 transfer
 nyu
 business
 cas
 major
 applying
 econ
 apply
 ed
Cluster 6:
 students
 nyu
 student
 current
 program
 know
 college
 free
 wondering
 new
Cluster 7:
 know
 nyu
 new
 like
 looking
 people
 thanks
 does
 need
 interested
Cluster 8:
 summer
 housing
 nyu
 sublet
 looking
 apartment
 staying
 hall
 nyc
 internship
Cluster 9:
 sat
 gpa
 school
 ap
 high
 scores
 nyu
 score
 year
 ib
Cluster 10:
 aid
 financial
 nyu
 package
 got
 scholarships
 international
 scholarship
 question
 decision
Cluster 11:
 removed
 nyu
 free
 students
 lookin

In [217]:
cluster_names = {0: 'game', 1: 'NYU ', 2: 'random', 3: 'Housing', 
                 4: 'School/Transfer', 5: 'Stern/Transfer', 6: 'NYU program/students', 7: 'New Experience/People',
                 8: 'Summer Housing/Internship', 9:'Application (high school student)', 10: 'Financial Aid',
                11:'Study Abroad',12:'CS CAS/Tandon', 13:'Course Information'}

In [218]:
# Unwraps the prediction from the model and looks up the category string in the dictionary, as well as grouping
# classifications with similar characteristics.
def classify(post) :
    Y = vectorizer.transform([post])
    prediction = model.predict(Y)[0]
    if prediction == 13 :
        prediction = 12
    if prediction > 13:
        prediction = 13
    return prediction

In [219]:
#Note to self: A bit dirty data
#create random sample of dataframe
sample = posts.sample(n=40)
header_str = '~~~~~~~~~~'
#sample = df
pred = []
#add column for the prediction to the dataframe
for row in sample.iterrows() : 
    pred.append(classify(row[1]['doc']))
sample['pred'] = pred
#display sample posts by subject
for i in range(0,9) :
        print()
        print(header_str,cluster_names[i],header_str)
        sub = sample[sample['pred'] == i]
        for row in sub.iterrows() :
            print(row[1]['title'])


~~~~~~~~~~ game ~~~~~~~~~~

~~~~~~~~~~ NYU  ~~~~~~~~~~
NYU alumni, how is the alumni network for getting jobs?
Prospective student here with some questions!
Different between NYU and NYU School of Professional studies?

~~~~~~~~~~ random ~~~~~~~~~~
Entrepreneurship Idea - Quick Survey (Looking for NYU Students)
CAPSTONE PROJECT HELP NEEDED
"Better Than We Are" - A Dark Comedy by Metaphysics Exam Productions

~~~~~~~~~~ Housing ~~~~~~~~~~
Choosing a freshman dorm
Honestly: Is the female to straight-guy ratio really as bad as everyone makes it out to be?
Sophomore Transfer: Dorm Recommendations?

~~~~~~~~~~ School/Transfer ~~~~~~~~~~
Actor looking for NYU film students
Dramatic Writing Masters Advice
Tisch Internal Transfer

~~~~~~~~~~ Stern/Transfer ~~~~~~~~~~

~~~~~~~~~~ NYU program/students ~~~~~~~~~~
NYU Nightlife
NYC ID
Portal changes

~~~~~~~~~~ New Experience/People ~~~~~~~~~~
Anyone down to see Star Slinger and Bondax on Dec 5th?
NYC to add solar technology on pay phones
To the 

In [222]:
#Classify each content and title with the right label
classifications = []

# add classification for every row
for index, row in posts.iterrows() :
    doc = row['doc']
    classifications.append(cluster_names[classify(doc)])
posts['class'] = classifications
posts = posts.drop(['doc'], axis = 1) # don't need this anymore
posts.head(5)

Unnamed: 0,id,name,url,title,content,score,created_utc,permalink,link_flair_text,class
0,ehaes3,somedude297,https://www.reddit.com/r/nyu/comments/ehaes3/i...,Internal Transfer to Stern,What is the word count for the personal statem...,1,1577649000.0,/r/nyu/comments/ehaes3/internal_transfer_to_st...,,Stern/Transfer
1,ehacsf,skt_fekar,https://www.reddit.com/r/nyu/comments/ehacsf/a...,Are electives generally harder than the requir...,I just took my last required course (Basic Alg...,1,1577648000.0,/r/nyu/comments/ehacsf/are_electives_generally...,,Course Information
2,eha7dc,Funkphlex,https://v.redd.it/uqbufxdpgm741,BYOB-friendly New Years Eve Dance Party & Vari...,,6,1577648000.0,/r/nyu/comments/eha7dc/byobfriendly_new_years_...,,New Experience/People
3,eh7v8h,sarampagnepapi,https://www.reddit.com/r/nyu/comments/eh7v8h/l...,last min photographers in manhattan?,im here until jan 1st and im looking for a sup...,1,1577637000.0,/r/nyu/comments/eh7v8h/last_min_photographers_...,,New Experience/People
4,eh7hnm,jerseyskies,https://www.reddit.com/r/nyu/comments/eh7hnm/i...,is it okay if i tuen in my film portfolio today?,i know they said at least 2 weeks prior to the...,1,1577635000.0,/r/nyu/comments/eh7hnm/is_it_okay_if_i_tuen_in...,,New Experience/People


In [226]:
terms = {}
for key, value in cluster_names.items():
    terms[value] = 0

# tally up each category
for index, row in posts.iterrows() :
    terms[row['class']] += 1

In [247]:
# Create temporary dataframe for use of Plotly
df_temp = pd.DataFrame()
df_temp['classification'] = terms.keys()
df_temp['count'] = terms.values()
trace = go.Pie(
    labels = df_temp['classification'],
    values = df_temp['count'],
)
fig = go.Figure(trace, title = 'Classification of r/NYU Posts by Percent')
fig.show()

In [383]:
analyzer = SentimentIntensityAnalyzer()
def get_sentiment(content):
    score = analyzer.polarity_scores(content)
    if score['pos'] >= 0.05:
        return 'positive'
    if score['neg'] <= 0.05:
        return 'negative'
    else:
        return 'neutral'
    

In [384]:
sentiments = []
for index, row in posts.iterrows():
    sentiment = get_sentiment(row['title'] + ' ' + row['content'])
    sentiments.append(sentiment)

posts['sentiment'] = sentiments
posts.head(10)

Unnamed: 0,id,name,url,title,content,score,created_utc,permalink,link_flair_text,sentiment
0,ehaes3,somedude297,https://www.reddit.com/r/nyu/comments/ehaes3/i...,Internal Transfer to Stern,What is the word count for the personal statem...,1,1577649000.0,/r/nyu/comments/ehaes3/internal_transfer_to_st...,,neutral
1,ehacsf,skt_fekar,https://www.reddit.com/r/nyu/comments/ehacsf/a...,Are electives generally harder than the requir...,I just took my last required course (Basic Alg...,1,1577648000.0,/r/nyu/comments/ehacsf/are_electives_generally...,,neutral
2,eha7dc,Funkphlex,https://v.redd.it/uqbufxdpgm741,BYOB-friendly New Years Eve Dance Party & Vari...,,6,1577648000.0,/r/nyu/comments/eha7dc/byobfriendly_new_years_...,,positive
3,eh7v8h,sarampagnepapi,https://www.reddit.com/r/nyu/comments/eh7v8h/l...,last min photographers in manhattan?,im here until jan 1st and im looking for a sup...,1,1577637000.0,/r/nyu/comments/eh7v8h/last_min_photographers_...,,positive
4,eh7hnm,jerseyskies,https://www.reddit.com/r/nyu/comments/eh7hnm/i...,is it okay if i tuen in my film portfolio today?,i know they said at least 2 weeks prior to the...,1,1577635000.0,/r/nyu/comments/eh7hnm/is_it_okay_if_i_tuen_in...,,positive
5,eh6zs6,bluethotiana,https://www.reddit.com/r/nyu/comments/eh6zs6/c...,Current High school Junior with a few questions,1. I heard that NYU has started a Data Science...,1,1577633000.0,/r/nyu/comments/eh6zs6/current_high_school_jun...,,positive
6,eh4d8b,ContrabandParagon,https://www.reddit.com/r/nyu/comments/eh4d8b/w...,Why did you choose NYU?,,11,1577614000.0,/r/nyu/comments/eh4d8b/why_did_you_choose_nyu/,,negative
7,eh3ao8,herefortheshibas,https://www.reddit.com/r/nyu/comments/eh3ao8/s...,Should I join Stern's AKPsi Business Frat?,I will be a freshman at Stern in the fall of 2...,1,1577605000.0,/r/nyu/comments/eh3ao8/should_i_join_sterns_ak...,,positive
8,eh2vn9,hellasadgemini,https://www.reddit.com/r/nyu/comments/eh2vn9/t...,Theatre for non-majors?,"Hi, I'm currently finishing my EDII applicatio...",3,1577602000.0,/r/nyu/comments/eh2vn9/theatre_for_nonmajors/,,negative
9,eh2sws,Ilovehashmaps,https://www.reddit.com/r/nyu/comments/eh2sws/h...,Has anyone successfully waived their Texts and...,I was checking [https://cas.nyu.edu/content/n...,4,1577602000.0,/r/nyu/comments/eh2sws/has_anyone_successfully...,,positive


In [None]:
cmt_sentiments = []
for index, row in comments.iterrows():
    sentiment = get_sentiment(row['content'])
    cmt_sentiments.append(sentiment)

comments['sentiment'] = cmt_sentiments
comments.head(10)

In [None]:
#Get the total sentiments of posts and comments total
post_dict = posts['sentiment'].value_counts().to_dict()
cmt_dict = comments['sentiment'].value_counts().to_dict()

In [382]:
#Pie plot of the sentiment percentage
trace = go.Pie(
    labels = list(post_dict.keys()),
    values = list(post_dict.values()),
)
fig = go.Figure(trace)
fig.update_layout(title = 'Sentiments of r/NYU Percentage')
fig.show()

#Pie plot of the sentiment percentage
trace = go.Pie(
    labels = list(cmt_di.keys()),
    values = list(post_dict.values()),
)
fig = go.Figure(trace)
fig.update_layout(title = 'Sentiments of r/NYU Percentage')
fig.show()

In [313]:
#Finding the users with the most reddit karma
users_ls = users['name'].tolist()
scores = {user : 0 for user in users_ls}

#Get the score from posts dataframe
for index, row in posts.iterrows():
    user = row['name']
    if user in scores:
        scores[user] += row['score']

#Get the score from comments dataframe        
for index, row in comments.iterrows():
    user = row['name']
    if user in scores:
        scores[user] += row['score']

def get_value(k):
    return scores[k]

In [317]:
top_users = sorted(scores, key = get_value, reverse = True)[:10]
top_scores = list()
for user in top_users:
    top_scores.append(scores[user])

top_posters = pd.DataFrame({'User': top_users, 'Karma': top_scores})
top_posters

Unnamed: 0,User,Karma
0,,4680
1,Impune,3030
2,Conpen,1453
3,sokpuppet1,787
4,doneanddead,529
5,KenyaAirwaysAnon,448
6,lolmonger,429
7,Wherearemylegs,424
8,hardwaregeek,420
9,Iambikecurious,415


In [326]:
top_users_50 = sorted(scores, key = get_value, reverse = True)[:50]
fig = go.Figure()
for i, user in enumerate(top_users_50) :
    # add a new bar to the graph
    fig.add_trace(go.Bar(
        x=[user],
        y=[scores[user]],
        name= user,
        marker_color='darkviolet'
    ))

    
# plot the graph
fig.update_layout(yaxis_title="Total Karmas", xaxis_title="Users", title="Most Popular Posters/Commenters on r/NYU")
fig.show()

In [98]:
#Top 10 post with the highest reddit karma
top_posts = posts.sort_values(['score'], ascending = False).head(10)
standing = 1
for index, row in top_posts.iterrows():
    print('Number: ' + str(standing))
    print('Post Title: \"' + row['title'] + '\"')
    print('Author: ' + row['name'])
    print('Score: ' + str(row['score']))
    date_posted = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['created_utc']))
    print('Posted on: ' + date_posted )
    print('URL: https://www.reddit.com' + row['permalink'])
    print('---------------------------------------------')
    standing += 1

Number: 1
Post Title: "To the NYU students on the overnight from Nairobi to JFK: you were a fucking embarrassment and disgrace."
Author: KenyaAirwaysAnon
Score: 448
Posted on: 2019-03-24 09:15:59
URL: https://www.reddit.com/r/nyu/comments/b4w826/to_the_nyu_students_on_the_overnight_from_nairobi/
---------------------------------------------
Number: 2
Post Title: "Congrats class of 2019!!!"
Author: cxavier
Score: 219
Posted on: 2019-05-22 15:10:36
URL: https://www.reddit.com/r/nyu/comments/brst4i/congrats_class_of_2019/
---------------------------------------------
Number: 3
Post Title: "Got banned for posting this earlier"
Author: imsh9381
Score: 207
Posted on: 2019-10-16 11:27:02
URL: https://www.reddit.com/r/nyu/comments/diqv3g/got_banned_for_posting_this_earlier/
---------------------------------------------
Number: 4
Post Title: "Congrats NYU 2018! Here's a picture I took of the Empire State Building lit up in violet last night."
Author: skier
Score: 196
Posted on: 2018-05-16 22:09

In [118]:
#Finding the parent posts
def find_parent(comment_id):
    comment_id = comment_id.split('_')[1]
    parent_url = 'Not Found'
    while comment_id in comments['id']:
        for index, row in comments.iterrows():
            if(item == row['id']):
                comment_id = row['id']
                
    for index, row in posts.iterrows():
        if row['id'] == comment_id:
            parent_url = row['url']
    return parent_url

In [123]:
#Function to find the top 10 best/worst comments in NYU subreddit
def find_mojo(best):
    order = None
    if best is True:
        order = False
    else:
        order = True
    
    bottom_comments = comments.sort_values(['score'], ascending = order).head(10)
    standing = 1
    for index, row in bottom_comments.iterrows():
        print('Number: ', str(standing))
        print('Comment: \"', row['content'], '\"')
        print('Posted By: ', row['name'])
        print('Score: ', str(row['score']))
        date_posted = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['created_utc']))
        print('Posted on: ' + date_posted)
        print('Parent post: ', find_parent(row['comment_id']))
        print('------------------------------------------')
        standing += 1

In [124]:
find_mojo(True)

Number:  1
Comment: " Fuckin Stern kids  "
Posted By:  tricksta616
Score:  136
Posted on: 2019-03-24 09:23:00
Parent post:  https://www.reddit.com/r/nyu/comments/b4w826/to_the_nyu_students_on_the_overnight_from_nairobi/
------------------------------------------
Number:  2
Comment: " Gonna gently suggest that it’s saying things and acting like this that make it hard to make friends:

https://www.reddit.com/r/nyu/comments/e1uhdq/does_anyone_here_dislike_some_international/f8tr573/

Your [other posts](https://www.reddit.com/user/ilscilsc) calling people morons, retarded, bastards, trash, etc don’t endear you either. 

Look, it’s clear you got some issues, so I’m somewhat sympathetic, but maybe being nicer to people and not being so quick to insult folks would help you make better connections. "
Posted By:  sokpuppet1
Score:  109
Posted on: 2019-11-27 07:56:46
Parent post:  https://www.reddit.com/r/nyu/comments/e2de47/dont_want_to_spend_another_thanksgiving_alone_rant/
-------------------

In [126]:
find_mojo(False)            

Number:  1
Comment: " happy 😃 9/11 😽 you sexy 😼 sluts 😍 today 📆 is the day 🤙 the twins 👯 got aborted 🤰 share 👐 with 10 🔟 patriots 🎩 🇺🇸 for another ➿ abortion ✈ 🏙 and be 🐝 a true 💯 american 🦅 do NOT 🚫 share 👐 and you 👈 WILL 😮 become 😩 an infidel 👳 "
Posted By:  Boy_314
Score:  -51
Posted on: 2018-09-11 11:15:41
Parent post:  https://www.reddit.com/r/nyu/comments/9ey8pq/weirdbad_roommate_stories/
------------------------------------------
Number:  2
Comment: " Lmao he's gay and she's a lesbian. I can't believe any of this. An older gay person will know if they like the other sex or not, she's like at least 60. He's a dumbass. If she wasn't a proud and out lesbian for decades I might have believed him "
Posted By:  zinko101
Score:  -34
Posted on: 2018-08-15 12:32:57
Parent post:  http://gothamist.com/2018/08/14/female_nyu_professor_suspended.php
------------------------------------------
Number:  3
Comment: " im tryna transfer into wharton or sloan or harvard or yale or stanford or columb

In [102]:
#First 10 post on NYU subreddit
first_posts = posts.sort_values(['created_utc'], ascending = [True]).head(10)
standing = 1
for index, row in first_posts.iterrows():
    print('Number: ' + str(standing))
    print('Post Title: \"' + row['title'] + '\"')
    print('Author: ' + row['name'])
    print('Score: ' + str(row['score']))
    date_posted = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['created_utc']))
    print('Posted on: ' + date_posted)
    print('URL: https://www.reddit.com' + row['permalink'])
    print('---------------------------------------------')
    standing += 1

    
first_post = posts.sort_values(['created_utc'], ascending = True).head(1)
form = '%Y-%m-%d'
fpost_date = time.strftime('%Y-%m-%d', time.localtime(first_post['created_utc']))
fpost_date = datetime.strptime(fpost_date, form)
created_date = datetime(2009,11,4)
diff = fpost_date - created_date
diff = diff.days
curr = diff
year = 0
while(curr > 0):
    curr = diff - 365
    if curr >= 0:
        year += 1
        diff = curr
print('\nFirst post created: ' + str(year) + ' year and ' + str(diff) + ' days after the subreddit was created')    
#It is interesting to note that while r/NYU itself was created on November 4th, 2009, 
#it appears that the first post was not made until November 28th, 2010.
# which is

Number: 1
Post Title: "No NYU love?"
Author: no_pun_intendid
Score: 5
Posted on: 2010-11-28 12:23:51
URL: https://www.reddit.com/r/nyu/comments/ecwp1/no_nyu_love/
---------------------------------------------
Number: 2
Post Title: "Are most of you rich?"
Author: YouBestBeTrollin
Score: 1
Posted on: 2010-11-30 19:36:54
URL: https://www.reddit.com/r/nyu/comments/ee7am/are_most_of_you_rich/
---------------------------------------------
Number: 3
Post Title: "So, I go to Stevens in Hoboken..."
Author: TODizzle91
Score: 4
Posted on: 2010-12-04 20:20:34
URL: https://www.reddit.com/r/nyu/comments/egcla/so_i_go_to_stevens_in_hoboken/
---------------------------------------------
Number: 4
Post Title: "Procrastination from Finals!"
Author: sallyNYU
Score: 4
Posted on: 2010-12-09 18:53:37
URL: https://www.reddit.com/r/nyu/comments/ej8a1/procrastination_from_finals/
---------------------------------------------
Number: 5
Post Title: "Are you a Flash programmer or do you know a Flash programmer wh