In [None]:
# Definition Block
furl = 'http://portal.amfiindia.com/DownloadNAVHistoryReport_Po.aspx?tp=1&frmdt={:02d}-{}-{}&todt={:02d}-{}-{}'
forced = False
dataFilenameFormat = 'amfiData/{}{:02d}{:02d}-{}{:02d}{:02d}.txt'
amfiDataPath = 'amfiData/{}'
amfiDataDir = './amfiData/'
amfiDataLookFor = 'Scheme Code;Scheme Name;ISIN Div Payout/ISIN Growth;ISIN Div Reinvestment;Net Asset Value;Repurchase Price;Sale Price;Date'
sleepDurationForAPI = 1
earliestRecordedYear = 2005
jsonFilenameFormat = 'amfiJSON/{}.json'
jsonFilenameFormatNOSAVE = 'amfiData/{}.json'
amfiJSONDir = './amfiJSON/'
amfiJSONPath = 'amfiJSON/{}'
lastForcedFiles = []
VERBOSE = False
windows = [0, 10, 20, 30, 60, 90, 120, 180, 240, 300, 390, 480, 570]
weights = [1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 4, 5][::-1]
months = dict([[x[1], x[0]] for x in [
    ['Jan', 1],
    ['Feb', 2],
    ['Mar', 3],
    ['Apr', 4],
    ['May', 5],
    ['Jun', 6],
    ['Jul', 7],
    ['Aug', 8],
    ['Sep', 9],
    ['Oct', 10],
    ['Nov', 11],
    ['Dec', 12]
]])


# Block Flags
downloadData = True
deleteFiles = True
buildJSON = True
getActiveMFS = True
extractProtfolio = True
getMFAPI = True
reportTop50 = True


# Imports
import requests
import time
import os
from datetime import date
import json
import sys
import pandas as pd

istime = time.time()

# Hidden Prints
class HiddenPrints:
    def __enter__(self, verbose=VERBOSE):
        self._original_stdout = sys.stdout
        self._verbose = verbose
        if not verbose:
            sys.stdout = open(os.devnull, 'w')

    def __init__(self, verbose=VERBOSE):
        self._verbose = verbose

    def __exit__(self, exc_type, exc_val, exc_tb):
        if not self._verbose:
            sys.stdout.close()
            sys.stdout = self._original_stdout


# timeit
def timeit(f):
    st = time.time()
    k = f()
    print(f"Finished in {time.time() - st}")
    return k




# Download Data
def downloadDataFunction(verbose=VERBOSE):
    daysList = [(2, 9), (10, 19), (20, 24), (25, 1)]
    today = date.today()
    currentYear = today.year
    currentMonth = today.month
    currentDay = today.day
    onceMonth = True
    onceDay = currentDay < 28
    forcedMonth = -1
    with HiddenPrints(verbose):
        print(currentYear, currentMonth, currentDay)
        filesPresent = [amfiDataPath.format(f) for f in os.listdir(amfiDataDir) if f.endswith('.txt')]
        # print(filesPresent)

        for yyyy in range(currentYear, earliestRecordedYear, -1):
            monthsToSeek = months
            if onceMonth:
                onceMonth = False
                if currentDay == 1:
                    onceDay = False
                    if currentMonth > 1:
                        monthsToSeek = {
                            k: v for k, v in months.items() if k <= currentMonth - 1
                        }
                        forcedMonth = currentMonth - 1
                    else:
                        forcedMonth = 12
                        continue
                else:
                    monthsToSeek = {
                        k: v for k, v in months.items() if k <= currentMonth
                    }
                    forcedMonth = currentMonth
            for index, month in monthsToSeek.items():
                for days in daysList:
                    if onceDay:
                        if currentDay < days[0]:
                            onceDay = False
                            continue
                    yyyy1 = yyyy
                    yyyy2 = yyyy
                    am = index
                    if days[0] < days[1]:
                        url = furl.format(days[0], month, yyyy1, days[1], month, yyyy2)
                    else:
                        if month == 'Dec':
                            am = 1
                            monthx = 'Jan'
                            yyyy2 += 1
                        else:
                            am += 1
                            monthx = months[index + 1]
                        url = furl.format(days[0], month, yyyy1, days[1], monthx, yyyy2)
                    filename = dataFilenameFormat.format(yyyy1, index, days[0], yyyy2, am, days[1])
                    if not(filename not in filesPresent or forced or index == forcedMonth):
                        print("Skipping: {}, already present:{}".format(filename, url))
                        continue
                    if index == forcedMonth:
                        lastForcedFiles.append(filename)
                    print("Downloading: {}, {}".format(filename, url))
                    file = requests.get(url, allow_redirects=True)
        #             print(filename)
                    open(filename, 'wb').write(file.content)
                    time.sleep(sleepDurationForAPI)
            forcedMonth = -1

if downloadData:
    print("Starting downloadData...")
    timeit(downloadDataFunction)
    

# Delete files without data
def deleteFilesFunction(verbose=VERBOSE):
    filesPresent = [amfiDataPath.format(f) for f in os.listdir(amfiDataDir) if f.endswith('.txt')]
    lf = []
    scontent = set()
    with HiddenPrints(verbose):
        for f in filesPresent:
            with open(f) as of:
                contents = of.read().split('\n')
                if contents[0] != amfiDataLookFor:
                    lf.append(f)
                i = 0
                while contents[i] == '':
                    i += 1
                scontent.add(contents[i])
        lf.sort()
        for s in scontent:
            print(s)
        print(len(lf))
        for f in lf:
            print(f)
            os.remove(f)

if deleteFiles:
    print("Starting deleteFiles...")
    timeit(deleteFilesFunction)


# Build JSON
def buildJSONFunction(verbose=VERBOSE):
    filesPresent = [amfiDataPath.format(f) for f in os.listdir(amfiDataDir) if f.endswith('.txt')]
    countHEADMap = {}
    categories = set()
    mfhs = set()
    mf = {}
    mfd = {}
    currentMFH = ''
    currentCategory = ''
    with HiddenPrints(verbose):
        for f in filesPresent:
            with open(f) as of:
                contents = of.read().split('\n')
                count = 0
                lastLine = ''
                for c in contents:
                    if c == '':
                        continue
                    if amfiDataLookFor in c:
                        count += 1
                    elif ';' in c:
                        if lastLine != '':
                            mfhs.add(lastLine)
                            currentMFH = lastLine
                            lastLine = ''
        #                 else:
        #                     print(f, c)
                        data = c.split(';')
                        if len(data) != 8:
                            raise ValueError()
                        schemeCode = int(data[0])
                        if schemeCode not in mf:
                            mf[schemeCode] = {
                                'scheme_code': schemeCode,
                                'scheme_name': data[1],
                                'ISIN_Div_Payout_or_ISIN_Growth': data[2] if len(data[2]) else 'NA',
                                'ISIN_Div_Reinvestment': data[3] if len(data[3]) else 'NA',
                                'fund_house': currentMFH,
                                'category': currentCategory
                            }
                        else:
                            td = {
                                'scheme_code': schemeCode,
                                'scheme_name': data[1],
                                'ISIN_Div_Payout_or_ISIN_Growth': data[2] if len(data[2]) else 'NA',
                                'ISIN_Div_Reinvestment': data[3] if len(data[3]) else 'NA',
                                'fund_house': currentMFH,
                                'category': currentCategory
                            }
                            for k in td:
                                if td[k] != mf[schemeCode][k]:
                                    print(f, k, k)
                        nav = 'NA'
                        try:
                            nav = float(data[4])
                        except:
                            nav = 'NA'
                        repurchasePrice = 'NA'
                        try:
                            repurchasePrice = float(data[5])
                        except:
                            repurchasePrice = 'NA'
                        salePrice = 'NA'
                        try:
                            salePrice = float(data[6])
                        except:
                            salePrice = 'NA'
                        dataDate = data[7]
                        dateSplit = dataDate.split('-')
                        if len(dateSplit) != 3:
                            raise ValueError("{} date failed:{}".format(f, c))
                        d = {
                            'nav': nav,
                            'repurchasePrice': repurchasePrice,
                            'salePrice': salePrice,
                            'date': dataDate
                        }
                        if nav == 'NA' and repurchasePrice == 'NA' and 'salePrice' == 'NA':
                            continue
                        if schemeCode not in mfd:
                            mfd[schemeCode] = []
                        mfd[schemeCode].append(d)
                    else:
                        if lastLine != '':
                            currentCategory = lastLine
                            categories.add(lastLine)
                        lastLine = c
        #         print(f, count)
                if count not in countHEADMap:
                    countHEADMap[count] = 0
                countHEADMap[count] += 1
        print("countHEADMap: ", countHEADMap)
        print("categories: ", len(categories), '' in categories)
        print("mfhs: ", len(mfhs), '' in mfhs)
        print("mf: ", len(mf))
        print("mfd: ", len(mfd))
        for sc in mf:
            if sc not in mfd:
                raise ValueError("{} present in mf but in mfd".format(sc))
        for sc in mfd:
            if sc not in mf:
                raise ValueError("{} present in mf but in mfd".format(sc))

        def dateNumber(d):
            d = d.split('-')
            d[1] = [x[0] for x in months.items() if x[1] == d[1]]
            assert len(d[1]) == 1, f'{d[1]}'
            d[1] = d[1][0]
            n = '{}{:02d}{}'.format(*d[::-1])
            return int(n)

        mfl = []
        for sc in mf:
            os.remove(jsonFilenameFormat.format(sc))
        for sc in mf:
            d = {
                'meta': mf[sc],
                'data': sorted(mfd[sc], key=lambda entry: dateNumber(entry['date']), reverse=True)
            }
            mfl.append(mf[sc])
            with open(jsonFilenameFormat.format(sc), 'w') as jsf:
        #         json.dump(d, jsf, indent = 2, sort_keys=True)
                json.dump(d, jsf)

        with open(jsonFilenameFormat.format('ALLMF'), 'w') as jsf:
        #         json.dump(d, jsf, indent = 2, sort_keys=True)
            json.dump(mfl, jsf)


if buildJSON:
    print("Starting buildJSON...")
    timeit(buildJSONFunction)


# Get Active MFs
def getActiveMFSFunction(verbose=VERBOSE):
    with HiddenPrints(verbose):
        filesPresent = [amfiDataPath.format(f) for f in os.listdir(amfiDataDir) if f.endswith('.txt')]
        ac = set()
        for f in lastForcedFiles[::-1]:
            if f in filesPresent:
                with open(f, 'r') as of:
                    lines = of.read().split('\n')[1:]
                    for line in lines:
                        if ';' in line:
                            ac.add(int(line.split(';')[0]))
                break
        d = {'schemeList': list(ac)}
        print(d)
        json.dump(d, open(jsonFilenameFormatNOSAVE.format('active'), 'w'), indent=2, sort_keys=True)

if getActiveMFS:
    print("Starting getActiveMFS...")
    timeit(getActiveMFSFunction)


# Extract from my protfolio my current MF
def extractProtfolioFunction(verbose=VERBOSE):
    with HiddenPrints(verbose):
        kid = json.load(open(jsonFilenameFormatNOSAVE.format('mf'), 'r'))
        schemeCodes = set()
        for d in kid['data']['fund_list']:
            schemeCodes.add(d['scheme_code'])
        print(len(schemeCodes))
        # list(schemeCodes)
        d = {'schemeList': list(schemeCodes)}
        print(d)
        json.dump(d, open(jsonFilenameFormatNOSAVE.format('current'), 'w'), indent=2, sort_keys=True)

if extractProtfolio:
    print("Starting extractProtfolio...")
    timeit(extractProtfolioFunction)


# Download all list from mfapi
def getMFAPIFunction(verbose=VERBOSE):
    with HiddenPrints(verbose):
        url = 'https://api.mfapi.in/mf'
        file = requests.get(url, allow_redirects=True)
        filename = jsonFilenameFormat.format('ALLMFapi')
        open(filename, 'wb').write(file.content)

if getMFAPI:
    print("Starting getMFAPI...")
    timeit(getMFAPIFunction)


# Report Top 50
def reportTop50Function(verbose=VERBOSE):
    records = []
    with HiddenPrints(verbose):
        kid = json.load(open(jsonFilenameFormatNOSAVE.format('active'), 'r'))
        print(len(kid['schemeList']))
        for sc in kid['schemeList']:
            d = json.load(open(jsonFilenameFormat.format(sc)))
            dd = {'schemeCode': sc}
            cost = []
            for index, ws in enumerate(windows[1:]):
                if ws < len(d['data']) and d['data'][ws]['nav'] > 0:
                    gain = (d['data'][windows[index]]['nav'] / d['data'][ws]['nav'] - 1) * 100
                    cost.append([gain, weights[index]])
                else:
                    gain = 0
                dd[f'Gain:{ws}'] = gain
            if sum([v for _, v in cost]) > 0:
                dd['cost'] = sum([k*v for k, v in cost]) / sum([v for _, v in cost])
                records.append(dd)
#                 records.append({'schemeCode': sc, 'nav': r['nav'], 'date': r['date']})
        df = pd.DataFrame(records)
        sorted_df = df.sort_values(by='cost', ascending=False)
        print(sorted_df.head(50))
        sorted_df.head(50).to_csv(amfiDataPath.format('top50.csv'), index=False)
        d = {'schemeList': sorted_df.head(50)['schemeCode'].tolist()}
        json.dump(d, open(jsonFilenameFormatNOSAVE.format('top50'), 'w'), indent=2, sort_keys=True)
        

if reportTop50:
    print("Starting reportTop50...")
    timeit(reportTop50Function)

print(f"Total Time {time.time() - istime}")