<a href="https://colab.research.google.com/github/Krishna-Kumar-Sankaran-Kutty/PersonalFinanceUtils/blob/main/GetMFData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [48]:
def extract_portfolio(url):
  # Send an HTTP GET request to the URL
  response = requests.get(url)

  # Parse the HTML content using BeautifulSoup
  soup = BeautifulSoup(response.content, 'html.parser')

  # Find the table element
  table = soup.find('table', id='equityCompleteHoldingTable')

  # Extract table headers
  headers = [header.text.strip() for header in table.find_all('th')]

  # Extract table rows
  rows = []
  for row in table.find_all('tr')[1:]:  # Skip the header row
    data = [cell.text.strip() for cell in row.find_all('td')]
    rows.append(data)

  # Create a Pandas DataFrame
  df = pd.DataFrame(rows, columns=headers)

  # Add Fund Name column
  df['Fund Name'] = url.split('/')[-3].replace('-', ' ').title()

  # Set Fund Name column as index
  df = df.set_index('Fund Name')

  # Clean-up 'Stock Invested in' column
  df['Stock Invested in'] = df['Stock Invested in'].str.replace('#\n', '')

  # Clean-up '% of Total Holdings' column nad convert to float
  df['% of Total Holdings'] = df['% of Total Holdings'].str.rstrip('%').astype(float)

  # Convert 'Sector Total' to float
  df['Sector Total'] = df['Sector Total'].astype(float)

  return df


In [49]:
url = 'https://www.moneycontrol.com/mutual-funds/hdfc-index-fund-nifty-50-plan-direct-plan/portfolio-holdings/MHD1152'

df = extract_portfolio(url)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 51 entries, Hdfc Index Fund Nifty 50 Plan Direct Plan to Hdfc Index Fund Nifty 50 Plan Direct Plan
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Stock Invested in    51 non-null     object 
 1   Sector               51 non-null     object 
 2   Sector Total         51 non-null     float64
 3   Value(Mn)            51 non-null     object 
 4   % of Total Holdings  51 non-null     float64
 5   1M Change            51 non-null     object 
 6   1Y Highest Holding   51 non-null     object 
 7   1Y Lowest Holding    51 non-null     object 
 8   Quantity             51 non-null     object 
 9   1M Change in Qty     51 non-null     object 
 10  M-Cap                51 non-null     object 
 11  GroupName            51 non-null     object 
dtypes: float64(2), object(10)
memory usage: 5.2+ KB


In [51]:
# prompt: create a bubble plot of stock invested in vs sector, and bubble size will be % value of holdings

import plotly.express as px

fig = px.scatter(df, x="Stock Invested in", y="Sector", size="% of Total Holdings",
                 color="Sector", hover_name="Stock Invested in",
                 title="Stock Invested vs Sector",
                 size_max=60)
fig.show()
