In [1]:
# Import required libraries
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import csv
import time
import pandas as pd

### Step 1: Connect to the website and fetch HTML content

In [2]:
URL = 'https://dps.psx.com.pk/'
page = requests.get(URL)

# Parse the webpage using BeautifulSoup
soup_raw = BeautifulSoup(page.content, 'html.parser')

soup_raw  # view similar structure via Ctrl+Shift+I in browser

<!DOCTYPE html>
<html><head><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1, viewport-fit=cover" name="viewport"/><meta content="ucGuScv6fdM8uhZru-aeXK4qsaPfO3HiuZFKB90UpPU" name="google-site-verification"/><meta content="PSX (Pakistan Stock Exchange Limited) - Stock/share market updates from Pakistan’s premier stock exchange. Get all the current stock/share market data; information to investors on KSE 100, stock quotes, indices and corporate announcements." name="description"/><meta content="Pakistan Stock Exchange, Stock Exchange in Pakistan, PSX, PSX Pakistan, dps, data portal, KSE, KSE100, KSE announcement, PSX announcement, Lahore Stock Exchange, Karachi Stock Exchange, stock exchange, share markets, stock markets, live stock market, share market in Pakistan" name="keywords"/><link href="https://dps.psx.com.pk/static/images/favicon.png" rel="icon" type="image/png"/><!-- Apple specific--><link href="https://dps.psx.com.pk/static/images/logo-200.png" rel="a

### Step 2: Prettify and re-parse the HTML for better formatting

In [3]:
soup = BeautifulSoup(soup_raw.prettify(), 'html.parser')
soup

<!DOCTYPE html>

<html>
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1, viewport-fit=cover" name="viewport"/>
<meta content="ucGuScv6fdM8uhZru-aeXK4qsaPfO3HiuZFKB90UpPU" name="google-site-verification"/>
<meta content="PSX (Pakistan Stock Exchange Limited) - Stock/share market updates from Pakistan’s premier stock exchange. Get all the current stock/share market data; information to investors on KSE 100, stock quotes, indices and corporate announcements." name="description"/>
<meta content="Pakistan Stock Exchange, Stock Exchange in Pakistan, PSX, PSX Pakistan, dps, data portal, KSE, KSE100, KSE announcement, PSX announcement, Lahore Stock Exchange, Karachi Stock Exchange, stock exchange, share markets, stock markets, live stock market, share market in Pakistan" name="keywords"/>
<link href="https://dps.psx.com.pk/static/images/favicon.png" rel="icon" type="image/png"/>
<!-- Apple specific-->
<link href="https://dps.psx.com.pk/static/images/logo-200.p

### Step 3: Extract KSE-100 Index and Percentage Change

In [4]:
kse100 = soup.find("h1", class_="marketIndices__price").get_text(strip=True) # 'class' is a reserved keyword, so we use 'class_' instead
kse100

'159,733.921,549.98 (0.98%)'

In [5]:
# Slice the string to separate index and percentage
current_index = kse100[:10]
percent_change = kse100[11:]

print("CurrentIndex: ", current_index)
print("Percent Change: ", percent_change)

CurrentIndex:  159,733.92
Percent Change:  ,549.98 (0.98%)


### Step 4: Capture current system time

In [6]:
current_time = datetime.now().strftime("%H:%M:%S")
print("Time:", current_time)

Time: 11:41:05


### Step 5: Create CSV file and write header

In [7]:
# CSV header
header = ['Time', 'Current Index', 'Percent Change']

# Create CSV file (write mode 'w' — overwrites if exists)
with open('PSXWebScraperDataset.csv', 'w', newline='', encoding='UTF-8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

### Step 6: Define a function to repeatedly append data

In [8]:
def append_data():
    """Fetch latest PSX KSE-100 index and append it to CSV."""
    try:
        url = 'https://dps.psx.com.pk/'
        page = requests.get(url)
        
        soup = BeautifulSoup(page.content, 'html.parser')
        kse100 = soup.find("h1", class_="marketIndices__price").get_text(strip=True)
        
        current_index = kse100[:10]
        percent_change = kse100[11:]
        current_time = datetime.now().strftime("%H:%M:%S")
        
        data = [current_time, current_index, percent_change]
        
        # Append data to the existing CSV file
        with open('PSXWebScraperDataset.csv', 'a', newline='', encoding='UTF-8') as f:
            writer = csv.writer(f)
            writer.writerow(data)
            
        print(f"[{current_time}] Data appended successfully.")
        
    except Exception as e:
        print(f"Error fetching data: {e}")

### Step 7: Run scraper in a continuous loop (every 10 seconds)

In [9]:
try:
    while True:
        append_data()
        time.sleep(10)  # delay in seconds
except KeyboardInterrupt:
    print("Scraper stopped by user.")

[11:41:07] Data appended successfully.
[11:41:20] Data appended successfully.
[11:41:32] Data appended successfully.
[11:41:44] Data appended successfully.
[11:41:56] Data appended successfully.
[11:42:07] Data appended successfully.
[11:42:18] Data appended successfully.
[11:42:30] Data appended successfully.
[11:42:42] Data appended successfully.
Scraper stopped by user.


### Step 8: Load the saved dataset into Pandas DataFrame

In [10]:
df = pd.read_csv(r'G:\My Drive\Repos\PSX Web Scraper\PSXWebScraperDataset.csv')
df

Unnamed: 0,Time,Current Index,Percent Change
0,11:41:07,159733.92,",549.98 (0.98%)"
1,11:41:20,159733.92,",549.98 (0.98%)"
2,11:41:32,159733.92,",549.98 (0.98%)"
3,11:41:44,159733.92,",549.98 (0.98%)"
4,11:41:56,159733.92,",549.98 (0.98%)"
5,11:42:07,159733.92,",549.98 (0.98%)"
6,11:42:18,159733.92,",549.98 (0.98%)"
7,11:42:30,159733.92,",549.98 (0.98%)"
8,11:42:42,159733.92,",549.98 (0.98%)"
