In [61]:
import requests
import pandas as pd
import io

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [62]:
base_url = 'https://www.amfiindia.com/modules/'

In [63]:
mf_list = pd.read_json('mf_list.json')
mf_list.set_index('id', inplace=True)
mf_list.sort_index(inplace=True)
mf_list

Unnamed: 0_level_0,name
id,Unnamed: 1_level_1
1,Alliance Capital Mutual Fund
2,Standard Chartered Mutual Fund
3,Aditya Birla Sun Life Mutual Fund
4,Baroda BNP Paribas Mutual Fund
6,DSP Mutual Fund
...,...
76,Helios Mutual Fund
77,Zerodha Mutual Fund
78,Old Bridge Mutual Fund
79,Unifi Mutual Fund


In [64]:
# search in df[col_name] by splitting the search string and sorting the results by the number of matches of n-grams
def search(df, col_name, search_string):
    dfcopy = df.copy()
    words = search_string.lower().split()
    bigrams = [' '.join(words[i:i+2]) for i in range(len(words)-1)]
    trigrams = [' '.join(words[i:i+3]) for i in range(len(words)-2)]

    def score(row):
        name = row[col_name].lower()
        score = 0
        for word in words:
            if word in name:
                score += 1
        for bigram in bigrams:
            if bigram in name:
                score += 2
        for trigram in trigrams:
            if trigram in name:
                score += 3
        return score
    
    dfcopy['score'] = dfcopy.apply(score, axis=1)
    return dfcopy[dfcopy['score'] > 0].sort_values('score', ascending=False).drop('score', axis=1)


def search_mf(search_string):
    return search(mf_list, 'name', search_string)

search_mf_result = search_mf('ppfas')
display(search_mf_result)
fund_company_id = search_mf_result.index.values[0]

Unnamed: 0_level_0,name
id,Unnamed: 1_level_1
64,PPFAS Mutual Fund


In [65]:
schemes_response = requests.post(base_url + 'NavHistorySchemeNav', data={'ID': fund_company_id}, headers={"X-Requested-With": "XMLHttpRequest"})
schemes_json = schemes_response.json()
schemes_df = pd.DataFrame(schemes_json)
schemes_df.drop(['Selected'], axis=1, inplace=True)
schemes_df.set_index('Value', inplace=True)
schemes_df.sort_index(inplace=True)
schemes_df

Unnamed: 0_level_0,Text
Value,Unnamed: 1_level_1
122432,PPFAS Long Term Value Fund
122639,Parag Parikh Flexi Cap Fund - Direct Plan - Gr...
122640,Parag Parikh Flexi Cap Fund - Regular Plan - G...
143260,Parag Parikh Liquid Fund- Regular Plan- Growth
143261,Parag Parikh Liquid Fund- Regular Plan- Monthl...
143262,Parag Parikh Liquid Fund- Direct Plan- Monthly...
143263,Parag Parikh Liquid Fund- Direct Plan- Daily R...
143264,Parag Parikh Liquid Fund- Regular Plan- Daily ...
143265,Parag Parikh Liquid Fund- Direct Plan- Weekly ...
143266,Parag Parikh Liquid Fund- Regular Plan- Weekly...


In [69]:
def search_schemes(search_string):
    return search(schemes_df, 'Text', search_string)

def search_schemes_best(search_string):
    res = search_schemes(search_string).head(1)
    print(f"Best Match ID: {res.index.values}")
    print(f"Best Match: {res.values}")
    return res

search_scheme_best_result = search_schemes_best('Parag Parikh Flexi cap Fund Growth Direct')
scheme_id = search_scheme_best_result.index.values[0]

Best Match ID: ['122639']
Best Match: [['Parag Parikh Flexi Cap Fund - Direct Plan - Growth']]


In [70]:
htmldata_response = requests.post(
    base_url + 'NavHistoryPeriod',
    data={
        'mfID': fund_company_id,
        'scID': scheme_id,
        'fDate': '01-Mar-2020',
        'tDate': '23-Feb-2025'
    },
    headers={"X-Requested-With": "XMLHttpRequest"}
)
try:
    # Wrap HTML content in StringIO
    html_io = io.StringIO(htmldata_response.text)
    df = pd.read_html(html_io)
    df = df[0]
    df.index = df.iloc[:, -1].values
    df = df.iloc[:, :1]
    df.index.name = 'date'
    df.columns = ['nav']
    df.index = pd.to_datetime(df.index, format="%d-%b-%Y")
    display(df)
except:
    print(htmldata_response.text)
    print('Error in fetching data')

Unnamed: 0_level_0,nav
date,Unnamed: 1_level_1
2020-03-02,27.0962
2020-03-03,27.5750
2020-03-04,27.2193
2020-03-05,27.4586
2020-03-06,26.8220
...,...
2025-02-17,85.9720
2025-02-18,85.8927
2025-02-19,85.7517
2025-02-20,85.9271
