In [None]:
import argparse
import os
import pickle
import time
import datetime
import numpy as np
import math
import time
import pymongo

#from SentiCR.SentiCR import SentiCR
from SentiSW.code.classification.classifier import Classifier
from SentiSW.code.entity.training_set_generation import get_entity
from git_crawler import GitCrawler

users = []
tokens = []
user = ''
token = ''

'''
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
database = mongo_client["reposadb"]
pull_requests_collection = database["pull_requests"]
repositories_collection = database["repositories"]
'''

with open('auth.txt', 'r') as file:
    for line in file.readlines():
        user, token = line.split(':')
        users.append(user.replace('\n', ''))
        tokens.append(token.replace('\n', ''))

repositories = []

def init():
    print('Initialising')

    with open('repositories.txt', 'r') as file:
        repositories = file.read().splitlines()

    for repository in repositories:
        owner, name = repository.split("/")
        query = { "_id" : "{}/{}".format(owner, name) }
        documento = list(repositories_collection.find(query))

        if(len(documento) == 0):
            repositories_collection.insert_one({
                                                "_id" : "{}/{}".format(owner, name),
                                                "owner": owner,
                                                "name" : name,
                                                'open_pull_requests' : [],
                                                'closed_pull_requests' : []
                                            })

def get_tuples(texts):
    #sentiment_analyzer = Classifier(read=False, vector_method='tfidf')
    #sentiment_analyzer.save_model()
    sentiment_analyzer = Classifier(read=True, vector_method='tfidf')
    sentiments = sentiment_analyzer.get_sentiment_polarity_collection(texts)

    tuples = []
    i = 0
    for sentiment in sentiments:
        t = {'sentiment': sentiment[0]}
        if sentiment != 'Neutral':
            entity = get_entity(texts[i])
            t['entity'] = entity
        else:
            t['entity'] = None
        tuples.append(t)
        i = i + 1
    return tuples

def get_tuple(text):
    #sentiment_analyzer = Classifier(read=False, vector_method='tfidf')
    #sentiment_analyzer.save_model()
    sentiment_analyzer = Classifier(read=True, vector_method='tfidf')
    sentiment = sentiment_analyzer.get_sentiment_polarity(text)[0]
    ret = {'sentiment': sentiment}
    if sentiment != 'Neutral':
        entity = get_entity(text)
        ret['entity'] = entity
    else:
        ret['entity'] = None

    return ret

def classify(sentences):

    saved_SentiCR_model = 'classifier_models/SentiCR_model.sav'
    
    if(os.path.exists(saved_SentiCR_model)):
      sentiment_analyzer = pickle.load(open(saved_SentiCR_model, 'rb'))
      print('Loaded SentiCR model')
    else:
      sentiment_analyzer = SentiCR.SentiCR()
      pickle.dump(sentiment_analyzer, open(saved_SentiCR_model, 'wb'))
      print('Saved model to file')

    for sent in sentences:
        score = sentiment_analyzer.get_sentiment_polarity(sent)
        print(sent+"\n Score: "+str(score))

# Update

In [None]:
start_time = time.time()

init()

name = 'rails/rails'
        
try:
    crawler = GitCrawler(name)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(crawler.update_repository())
except: 
    pass
finally:
    loop.close()

elapsed_time = time.time() - start_time
formatted_elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))

print(formatted_elapsed_time)

# Classify

In [None]:
start_time = time.time()

MAX_CLASSIFY = 100

init()

# Select rows by absence of positive_reviews_count field. Indicating if the row has been classified or not
repository_query = {"repository_id": args.repo, "positive_reviews_count" : {"$exists" : False} }
            
#repository_query = {"repository_id": args.repo}
            
#repository_query = {"repository_id": "tensorflow/tensorflow", "number" : 792 }
            
prs = list(pull_requests_collection.find(repository_query).limit(int(MAX_CLASSIFY)))
            
for pr in prs:
    review_comments_ids = []
    review_comments = []
    issue_comments_ids = []
    issue_comments = []

    print('Classifying comments from Pull Request #{}'.format(pr['number']))
    k = 1
    for review_comment in pr['review_comments']:
        review_comments_ids.append(review_comment['_id'])
        review_comments.append(review_comment['body'])
        print("{} - {}".format(k, review_comment['body']))
        k += 1


    l = 1
    for issue_comment in pr['issue_comments']:
        issue_comments_ids.append(issue_comment['_id'])
        issue_comments.append(issue_comment['body'])
        print("{} - {}".format(l, issue_comment['body']))
        l += 1

    i = 0
    review_positive = 0
    review_neutral = 0
    review_negative = 0
    review_sentiments = get_tuples(review_comments)
    for sentiment in review_sentiments:
        if(sentiment['sentiment'] == 'Positive'):
            review_positive += 1
        elif(sentiment['sentiment'] == 'Neutral'):
            review_neutral += 1
        elif(sentiment['sentiment'] == 'Negative'):
            review_negative += 1

        pull_requests_collection.update_one({"repository_id" : args.repo, 'number' : pr['number'], 'review_comments._id' : review_comments_ids[i] }, { "$set" : {"review_comments.$.sentiment" : sentiment['sentiment'], "review_comments.$.entity" : sentiment['entity']} })
        i += 1

    i = 0
    issue_positive = 0
    issue_neutral = 0
    issue_negative = 0
    issue_sentiments = get_tuples(issue_comments)

    for sentiment in issue_sentiments:
        if(sentiment['sentiment'] == 'Positive'):
            issue_positive += 1
        elif(sentiment['sentiment'] == 'Neutral'):
            issue_neutral += 1
        elif(sentiment['sentiment'] == 'Negative'):
            issue_negative += 1
        pull_requests_collection.update_one({"repository_id" : args.repo, 'number' : pr['number'], 'issue_comments._id' : issue_comments_ids[i] }, { "$set" : {"issue_comments.$.sentiment" : sentiment['sentiment'], "issue_comments.$.entity" : sentiment['entity']} })
        i += 1
    
    pull_requests_collection.update_one({"repository_id" : args.repo, 'number' : pr['number']}, { '$set' : {'positive_reviews_count' : review_positive, 'neutral_reviews_count' : review_neutral, 'negative_reviews_count' : review_negative, 'positive_comments_count' : issue_positive, 'neutral_comments_count' : issue_neutral, 'negative_comments_count' : issue_negative, 'total_positive_count' : review_positive + issue_positive, 'total_neutral_count' : review_neutral + issue_neutral, 'total_negative_count' : review_negative + issue_negative} })

    if(int(pr["comments_count"]) < (issue_positive + issue_negative + issue_neutral)):
        print("Algo de errado nao esta certo.")
        break

    print("Review comments length: {}; Pos: {}; Neu: {}; Neg: {}\nIssue comments length: {}; Pos: {}; Neu: {}; Neg: {}\n".format(len(review_comments), review_positive, review_neutral, review_negative, len(issue_comments), issue_positive, issue_neutral, issue_negative))
    
    
elapsed_time = time.time() - start_time
formatted_elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))

print(formatted_elapsed_time)