In [1]:
# Import required libraries
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import csv
import time
import pandas as pd

### Step 1: Connect to the website and fetch HTML content

In [None]:
URL = 'https://dps.psx.com.pk/'
page = requests.get(URL)

# Parse the webpage using BeautifulSoup
soup_raw = BeautifulSoup(page.content, 'html.parser')

soup_raw  # view similar structure via Ctrl+Shift+I in browser

### Step 2: Prettify and re-parse the HTML for better formatting

In [None]:
soup = BeautifulSoup(soup_raw.prettify(), 'html.parser')
soup

### Step 3: Extract KSE-100 Index and Percentage Change

In [4]:
kse100 = soup.find("h1", class_="marketIndices__price").get_text(strip=True) # 'class' is a reserved keyword, so we use 'class_' instead
kse100

'156,732.87-1,732.18 (-1.09%)'

In [5]:
# Slice the string to separate index and percentage
current_index = kse100[:10]
percent_change = kse100[11:]

print("CurrentIndex: ", current_index)
print("Percent Change: ", percent_change)

CurrentIndex:  156,732.87
Percent Change:  1,732.18 (-1.09%)


### Step 4: Capture current system time

In [6]:
current_time = datetime.now().strftime("%H:%M:%S")
print("Time:", current_time)

Time: 17:08:52


### Step 5: Create CSV file and write header

In [7]:
# CSV header
header = ['Time', 'Current Index', 'Percent Change']

# Create CSV file (write mode 'w' — overwrites if exists)
with open('PSXWebScraperDataset.csv', 'w', newline='', encoding='UTF-8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

### Step 6: Define a function to repeatedly append data

In [8]:
def append_data():
    """Fetch latest PSX KSE-100 index and append it to CSV."""
    try:
        url = 'https://dps.psx.com.pk/'
        page = requests.get(url)
        
        soup = BeautifulSoup(page.content, 'html.parser')
        kse100 = soup.find("h1", class_="marketIndices__price").get_text(strip=True)
        
        current_index = kse100[:10]
        percent_change = kse100[11:]
        current_time = datetime.now().strftime("%H:%M:%S")
        
        data = [current_time, current_index, percent_change]
        
        # Append data to the existing CSV file
        with open('PSXWebScraperDataset.csv', 'a', newline='', encoding='UTF-8') as f:
            writer = csv.writer(f)
            writer.writerow(data)
            
        print(f"[{current_time}] Data appended successfully.")
        
    except Exception as e:
        print(f"Error fetching data: {e}")

### Step 7: Run scraper in a continuous loop (every 10 seconds)

In [9]:
try:
    while True:
        append_data()
        time.sleep(10)  # delay in seconds
except KeyboardInterrupt:
    print("Scraper stopped by user.")

[17:08:54] Data appended successfully.
[17:09:05] Data appended successfully.
[17:09:16] Data appended successfully.
[17:09:28] Data appended successfully.
[17:09:39] Data appended successfully.
[17:09:51] Data appended successfully.
[17:10:02] Data appended successfully.
[17:10:13] Data appended successfully.
[17:10:25] Data appended successfully.
[17:10:36] Data appended successfully.
Scraper stopped by user.


### Step 8: Load the saved dataset into Pandas DataFrame

In [10]:
df = pd.read_csv(r'G:\My Drive\Repos\PSX Web Scraper\PSXWebScraperDataset.csv')
df

Unnamed: 0,Time,Current Index,Percent Change
0,17:08:54,156732.87,"1,732.18 (-1.09%)"
1,17:09:05,156732.87,"1,732.18 (-1.09%)"
2,17:09:16,156732.87,"1,732.18 (-1.09%)"
3,17:09:28,156732.87,"1,732.18 (-1.09%)"
4,17:09:39,156732.87,"1,732.18 (-1.09%)"
5,17:09:51,156732.87,"1,732.18 (-1.09%)"
6,17:10:02,156732.87,"1,732.18 (-1.09%)"
7,17:10:13,156732.87,"1,732.18 (-1.09%)"
8,17:10:25,156732.87,"1,732.18 (-1.09%)"
9,17:10:36,156732.87,"1,732.18 (-1.09%)"
