# Data Scrapping Project

In [2]:
from bs4 import BeautifulSoup # importing the BeautifulSoup library
import requests # importing requests library

In [2]:
url = 'https://www.moneycontrol.com/mutual-funds/nav/quant-mid-cap-fund-direct-plan-idcw/MES044' # Quant mid Cap fund direct plan url from moneycontrol
page = requests.get(url)  # to perform an HTTP GET request to the URL
soup = BeautifulSoup(page.text,'html')  # default HTML parser provided by BeautifulSoup to parse the HTML content. 

In [3]:
soup

<!--header start--><!DOCTYPE html>
<html lang="en">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/> <meta content="1200" http-equiv="refresh"/> <meta content="ie=edge" http-equiv="x-ua-compatible"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" name="viewport"/>
<title>Quant Mid Cap Fund - Direct Plan - IDCW [70.3573]  | Quant Mutual Fund - Moneycontrol</title>
<link href="https://images.moneycontrol.com/images/responsive/common/apple-touch-icon.png" rel="apple-touch-icon"/>
<meta content="Get latest NAV, Returns, SIP Returns, Performance, Ranks, Dividends, Portfolio, CRISIL Rank, Expert Recommendations, and Comparison with gold, stock,ULIP etc. Calculate SIP, VIP Returns. Now invest in Quant Mid Cap Fund  - IDCW at Moneycontrol.com." name="description"/>
<meta content="Quant Mid Cap Fund  - IDCW, Quant Mid Cap Fund  - IDCW direct, mutual funds india, best equity mutual fund, Mid Cap Fund, Quant Mid Cap Fund  - IDC

In [4]:
# find require table from above HTML text format.

table = soup.find_all("table")[2]

In [5]:
table

<table cellpadding="0" cellspacing="0" class="mctable1" width="100%">
<thead>
<tr>
<th>Period Invested for</th>
<th>₹10000 Invested on</th>
<th>Latest Value</th>
<th>Absolute Returns</th>
<th>Annualised Returns</th>
<th>Category Avg</th>
<th>Rank within Category</th>
</tr>
</thead>
<tbody>
<tr>
<td class="robo_medium">1 Week</td>
<td>06-Dec-23</td>
<td>9961.00</td>
<td class="red_text">-0.39%</td>
<td class="green_text">-</td>
<td class="green_text">1.19%</td>
<td>29/29</td>
</tr>
<tr>
<td class="robo_medium">1 Month</td>
<td>13-Nov-23</td>
<td>10651.90</td>
<td class="green_text">6.52%</td>
<td class="green_text">-</td>
<td class="green_text">7.74%</td>
<td>23/29</td>
</tr>
<tr>
<td class="robo_medium">3 Month</td>
<td>13-Sep-23</td>
<td>10959.70</td>
<td class="green_text">9.60%</td>
<td class="green_text">-</td>
<td class="green_text">9.93%</td>
<td>18/29</td>
</tr>
<tr>
<td class="robo_medium">6 Month</td>
<td>13-Jun-23</td>
<td>12883.10</td>
<td class="green_text">28.83%</td>
<td 

In [6]:
# Extracting the columns data from above HTML text format by using 'find_all' method.

investment_period = table.find_all("th")

In [7]:
investment_period

[<th>Period Invested for</th>,
 <th>₹10000 Invested on</th>,
 <th>Latest Value</th>,
 <th>Absolute Returns</th>,
 <th>Annualised Returns</th>,
 <th>Category Avg</th>,
 <th>Rank within Category</th>]

In [8]:
# creating a list of value included in columns.

column_data = [title.text.strip() for title in investment_period] # Use strip function to remove the unnecessary HTML tag.

print(column_data)

['Period Invested for', '₹10000 Invested on', 'Latest Value', 'Absolute Returns', 'Annualised Returns', 'Category Avg', 'Rank within Category']


In [9]:
import pandas as pd #importing pandas library

df = pd.DataFrame(columns = column_data) #Creating a DataFrame

In [10]:
df

Unnamed: 0,Period Invested for,₹10000 Invested on,Latest Value,Absolute Returns,Annualised Returns,Category Avg,Rank within Category


In [11]:
spread_data = table.find_all("tr")

In [12]:
#Use of for loop to situate the row value in column

for row in spread_data[1:]:    
    row_data = row.find_all("td")
    individual_row_data = [data.text.strip() for data in row_data]
    
    
    
    length = len(df)
    df.loc[length] = individual_row_data

In [13]:
df

Unnamed: 0,Period Invested for,₹10000 Invested on,Latest Value,Absolute Returns,Annualised Returns,Category Avg,Rank within Category
0,1 Week,06-Dec-23,9961.0,-0.39%,-,1.19%,29/29
1,1 Month,13-Nov-23,10651.9,6.52%,-,7.74%,23/29
2,3 Month,13-Sep-23,10959.7,9.60%,-,9.93%,18/29
3,6 Month,13-Jun-23,12883.1,28.83%,-,24.01%,5/29
4,YTD,30-Dec-22,13184.1,31.84%,-,35.53%,21/29
5,1 Year,13-Dec-22,12705.5,27.05%,27.05%,31.79%,24/29
6,2 Year,13-Dec-21,15101.1,51.01%,22.89%,17.65%,4/25
7,3 Year,11-Dec-20,25405.4,154.05%,36.37%,27.97%,1/24
8,5 Year,13-Dec-18,34884.9,248.85%,28.37%,21.87%,1/22
9,10 Year,13-Dec-13,60133.6,501.34%,19.64%,21.64%,17/19


In [14]:
#Converting the datframe into .csv file.

df.to_csv(r"T:\Data Field\csv file\Quant Mid cap Fund.csv") 

### Apply background colour to the entire DataFrame

In [15]:
# Define the style for the entire DataFrame
styles = [
    {
        'selector': 'td',
        'props': [('background-color', 'lightyellow')]
    }
]

# Apply the style to the DataFrame
styled_df = df.style.set_table_styles(styles)

# Display the styled DataFrame
styled_df

Unnamed: 0,Period Invested for,₹10000 Invested on,Latest Value,Absolute Returns,Annualised Returns,Category Avg,Rank within Category
0,1 Week,06-Dec-23,9961.0,-0.39%,-,1.19%,29/29
1,1 Month,13-Nov-23,10651.9,6.52%,-,7.74%,23/29
2,3 Month,13-Sep-23,10959.7,9.60%,-,9.93%,18/29
3,6 Month,13-Jun-23,12883.1,28.83%,-,24.01%,5/29
4,YTD,30-Dec-22,13184.1,31.84%,-,35.53%,21/29
5,1 Year,13-Dec-22,12705.5,27.05%,27.05%,31.79%,24/29
6,2 Year,13-Dec-21,15101.1,51.01%,22.89%,17.65%,4/25
7,3 Year,11-Dec-20,25405.4,154.05%,36.37%,27.97%,1/24
8,5 Year,13-Dec-18,34884.9,248.85%,28.37%,21.87%,1/22
9,10 Year,13-Dec-13,60133.6,501.34%,19.64%,21.64%,17/19
