In [4]:
import sys 
import csv
import os
from datetime import datetime
from datetime import timedelta
import numpy as np

#Retrieve stock price for a particular day
def retrieveStockPrice(company):
    stock_file = os.path.join("SandP500_stock", company + "_data.csv")
    with open(stock_file) as stockfile:
        stockprice = {}
        reader = csv.DictReader(stockfile)
        for row in reader:
            if 'open' in row:
                if row["open"] and row['close']:
                    stockprice[row['date']] = round((float(row["open"]) + float(row['close'])) / 2, 5)
            else:
                if row["Open"] and row['Close']:
                    stockprice[row['Date']] = round((float(row["Open"]) + float(row['Close'])) / 2, 5)
    return stockprice

#Retriev accumulated sentiment and compare with actual stock price
company_list = []
with open('SP500.csv') as SP500csv:
    reader = csv.DictReader(SP500csv)
    for row in reader:
        company_list.append(row['Symbol'])

def transformwithSlash(date):
    if date.month < 10:
        date_string_month = str(date.year) + '-0' + str(date.month)                    
    else:
        date_string_month = str(date.year) + '-' + str(date.month)
    
    if date.day < 10:
        date_string = str(date_string_month) +  '-0' + str(date.day)
    else:
        date_string = str(date_string_month) +  '-' + str(date.day)       
    return date_string

def lenofDay(start_year, start_month, end_year, end_month):
    if end_month=='12':
        len_day = (datetime(int(end_year)+1,1,1)-datetime(int(start_year),int(start_month),1)).days
    else:
        len_day = (datetime(int(end_year),int(end_month)+1,1)-datetime(int(start_year),int(start_month),1)).days
    return len_day

def runROI(start_year, start_month, end_year, end_month, source_dir, filter_name):
    len_day = lenofDay(start_year, start_month, end_year, end_month)
    n_company = len(company_list)
    ROI_history = [0] * n_company
    for i in range(n_company):
        ROI_history[i] = [0] * len_day

    currHoldList = [0] * n_company    
    total_investment = [0] * len_day  #Record daily # of invested company, to find maximum money required.
    
    for year_int in range(int(start_year), int(end_year)+1):
        if (year_int == int(end_year)):
            last_month = int(end_month)            
        else:
            last_month = 12
        if(year_int == int(start_year)):
            first_month = int(start_month)
        else:
            first_month = 1
        start_date = datetime(year_int, first_month, 1)
    
        for single_month in (start_date.replace(month=n) for n in range(first_month, last_month + 1)):
            year = str(single_month.year)
            if single_month.month < 10:
                month = '0' + str(single_month.month)
            else:
                month = str(single_month.month)
            path = os.path.join("Extra_Storage", source_dir, year, month, "User_Sentiment")
            offset = (datetime(single_month.year, single_month.month, 1) - datetime(int(start_year), int(start_month), 1)).days

            #For weekly
            with open(os.path.join( path, 'acc_weekly_sentiment_' + year + '_' + month + '_ALL_' + filter_name + '.csv')) as accSentiment:
            #with open(os.path.join( path, 'acc_sentiment_' + year + '_' + month + '_ALL_' + filter_name + '.csv')) as accSentiment:
                reader = csv.DictReader(accSentiment)
                company_idx = 0
                for acc_Sentiment in reader:
                    currHold = currHoldList[company_idx]
                    company = company_list[company_idx]

                    avg_price = retrieveStockPrice(company) 
                    #Find first workday in the month.
                    findworkday = single_month
                    findwd_string = transformwithSlash(findworkday)
                    while findwd_string not in avg_price:
                        findworkday += timedelta(days=1)
                        findwd_string = transformwithSlash(findworkday)
                    previousPrice = avg_price[findwd_string]
                    #Add for hold 100 dollars for each stock at the begining.
                    if offset == 0:
                        currHold = 100 / previousPrice

                    sumSentiment = 0

                    if offset > 0:   #To add last month's ROI.
                        ROI_history[company_idx][offset] = round(ROI_history[company_idx][offset-1], 5)

                    for day in range(1, len(acc_Sentiment)+1):
                        next_date = datetime(int(year), int(month), day) + timedelta(days=1)  #Buy stock 1 day after
                        date =  str(int(month)) + '/' + str(day) + '/' + year
                        sumSentiment += float(acc_Sentiment[date])

                        next_date_string = transformwithSlash(next_date)

                        if next_date_string in avg_price:
                            nextdayPrice = avg_price[next_date_string]                
                            if sumSentiment > 0:
                                if currHold == 0:
                                    currHold = 100 / nextdayPrice
                                    previousPrice = avg_price[next_date_string]
                            elif sumSentiment < 0:
                                if currHold:
                                    currHold = 0

                            if currHold > 0:       
                                ROI_history[company_idx][day + offset -1] = round(ROI_history[company_idx][day + offset-2] + (nextdayPrice-previousPrice)*currHold , 5)
                                total_investment[day + offset -1] += 1
                            else:
                                ROI_history[company_idx][day + offset -1] = round(ROI_history[company_idx][day + offset-2], 5)

                            previousPrice = nextdayPrice #Save stock price for next iteration
                            sumSentiment = 0 #Reset sentiment
                        else:
                            ROI_history[company_idx][day + offset - 1] = round(ROI_history[company_idx][day + offset-2], 5)

                    currHoldList[company_idx]=currHold
                    company_idx+=1 #Predict next stock
    print ('Maximum companies invested in one day: ', max(total_investment))
    
    arr = np.array(ROI_history)
    #For weekly
    savepath = os.path.join("Extra_Storage", source_dir, end_year, start_year + start_month + '_to_' + end_year + end_month + '_' + filter_name + '_weekly.csv')
    #For daily
    #savepath = os.path.join("Extra_Storage", source_dir, end_year, start_year + start_month + '_to_' + end_year + end_month + '_' + filter_name + '_BuyZeroatBegin.csv')
    np.savetxt(savepath, arr, fmt='%1.5f', delimiter=',')
    addDateTag(savepath, start_year, start_month, end_year, end_month)
    #return ROI_history

def runSentiment(user_id, path, year, month):
    file =os.path.join(path, "User_Sentiment", 'acc_sentiment_' + year + '_' + month + '_' + user_id + '.csv' )
    sentiment = np.genfromtxt(file, delimiter=',', skip_header=1)
    return sentiment

def runWeeklySentiment(user_id, path, year, month):
    file =os.path.join(path, "User_Sentiment", 'acc_sentiment_' + year + '_' + month + '_' + user_id + '.csv' )
    sentiment = np.genfromtxt(file, delimiter=',', skip_header=1)
    
    weeklysentiment = []
    singlecompany_sentiment = []
    for row in sentiment:
            for i in range(len(row)):
                temp = 0
                if i < 7:
                    for idx in range(i+1):
                        temp += row[idx]
                    singlecompany_sentiment.append(temp)
                else:
                    for idx in range(i-6, i+1):
                        temp += row[idx]
                    singlecompany_sentiment.append(temp)
            #print(len(singlecompany_sentiment))
            weeklysentiment.append(singlecompany_sentiment)
            singlecompany_sentiment = []
    ans = np.asarray(weeklysentiment)
    #print(ans.shape)
    return ans

def addDateTag(roipath, start_year, start_month, end_year, end_month):
    #Generate dates
    dates = []
    date = datetime(int(start_year), int(start_month), 1)
    if end_month == '12':
        end_date = datetime(int(end_year)+1, 1, 1)
    else:
        end_date = datetime(int(end_year), int(end_month)+1, 1)
    for delta in range((end_date-date).days):
        day = date + timedelta(days = delta)
        dates.append( f"{day.month}/{day.day}/{day.year}" ) 

    with open(roipath) as outcsv:
        r = csv.reader(outcsv)
        data = [line for line in r]
    with open(roipath, 'w', newline='') as outcsv:
        writer = csv.writer(outcsv)
        writer.writerow(dates)
        writer.writerows(data)

def accumulateUserSentiments(start_year, start_month, end_year, end_month, source_dir, filter_name):
    for year_int in range(int(start_year), int(end_year)+1):
        if (year_int == int(end_year)):
            last_month = int(end_month)            
        else:
            last_month = 12
        if(year_int == int(start_year)):
            first_month = int(start_month)
        else:
            first_month = 1
        cur_year = str(year_int)
        
        for month_int in range(first_month, last_month+1):
            if(month_int<10):
                month = '0'+ str(month_int)
            else:
                month = str(month_int)
            
            path = os.path.join("Extra_Storage", source_dir, cur_year, month)
            if("predicted_scores" in filter_name):
                openfile = os.path.join(path, filter_name + ".csv" )
            else:
                openfile = os.path.join(path, "trust_scores_"+ cur_year + "_" + month + ".csv" )
                
            with open(openfile) as trust_scores:
                reader = csv.DictReader(trust_scores)
                scores = []

                for x in reader:
                    #stockRatio.append([int(float(x['n_stock_tweet'])/float(x['# n_tweet']))])
                    #authority_score.append([float(x['authority_score'])])
                    if(filter_name == 'authority'):
                        scores.append([float(x[filter_name + "_score"])*10**18])
                    elif("predicted_scores" in filter_name):
                        scores.append([float(x['combined_score'])])
                    else:
                        scores.append([float(x[filter_name + "_score"])])
                #print(scores)
            with open(os.path.join(path, "id_list.txt" )) as id_list:
                reader = id_list.readlines()
                idList = [x.strip() for x in reader]
                len_day = lenofDay(cur_year, month, cur_year, month)
                acc_sentiment = np.zeros((507,len_day))
                for user_id, ratio in zip(idList, scores):
                    #acc_sentiment += ratio*runSentiment(user_id, path, cur_year, month)
                    acc_sentiment += ratio*runWeeklySentiment(user_id, path, cur_year, month) #From daily to weekly sentiment
        
            #savepath = os.path.join(path, 'User_Sentiment', "acc_sentiment_" + cur_year + "_" + month + "_ALL_" + filter_name + ".csv" )
            savepath = os.path.join(path, 'User_Sentiment', "acc_weekly_sentiment_" + cur_year + "_" + month + "_ALL_" + filter_name + ".csv" )
            np.savetxt(savepath, acc_sentiment, fmt='%1.5f', delimiter=',')
            addDateTag(savepath, cur_year, month, cur_year, month)
            
    #ROI = runROI(year, start_month, end_month, 'ALL_FILTERED', path)
    #arr = np.array(ROI)
    #savepath = os.path.join("Extra_Storage", source_dir, year, year + '_' + start_month + '_to_' + end_month + '_' + filter_name + '.csv')
    #np.savetxt(savepath, arr, fmt='%1.5f', delimiter=',')
    #addDateTag(savepath, year, start_month, end_month)
    
if __name__ == "__main__":
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "expertise" )
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "experience" )
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "reputation" )
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "authority" )
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2015_11")
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2016_03")
        #accumulateUserSentiments('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2015_11_to_2016_04")
        #runROI('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2015_11")
        #runROI('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2016_03")
        #runROI('2015', '11', '2016', '04', "ArchiveteamTest", "predicted_scores_from_2015_11_to_2016_04")
        runROI('2015', '11', '2016', '04', "ArchiveteamTest", "expertise")
        runROI('2015', '11', '2016', '04', "ArchiveteamTest", "experience")
        runROI('2015', '11', '2016', '04', "ArchiveteamTest", "reputation")
        runROI('2015', '11', '2016', '04', "ArchiveteamTest", "authority")
        
        #runROI('2015', '11', '2016', '04', "ArchiveteamTest", "buyandhold")

Maximum companies invested in one day:  502
Maximum companies invested in one day:  502
Maximum companies invested in one day:  502
Maximum companies invested in one day:  502
