In [11]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import datetime 
import pyodbc

In [12]:
url = 'https://www.psx.com.pk/market-summary/#main'

In [13]:
response = requests.get(url)

In [14]:
soup = BeautifulSoup(response.content, 'html.parser')

In [15]:
container = soup.find('div', class_='col-sm-12 tab-pane inner-content-table automobile-div active')


In [16]:
rows = container.find_all('tr', class_='red-text-td')

In [17]:
dictionary = {
    'titles':[],
    'ldcp': [],
    'opens': [],
    'high': [],
    'low': [],
    'current': [],
    'change': [],
    'volume': [],
    'scrap_time':[]
}

for row in rows:
    a = row.find_all('td')
    dictionary['titles'].append(a[0].text.strip())
    dictionary['ldcp'].append(a[1].text.strip())
    dictionary['opens'].append(a[2].text.strip())
    dictionary['high'].append(a[3].text.strip())
    dictionary['low'].append(a[4].text.strip())
    dictionary['current'].append(a[5].text.strip())
    dictionary['change'].append(a[6].text.strip())
    dictionary['volume'].append(a[7].text.strip())
    dictionary['scrap_time'].append(datetime.datetime.now())

In [18]:
df = pd.DataFrame(dictionary)
df

Unnamed: 0,titles,ldcp,opens,high,low,current,change,volume,scrap_time
0,AL-Ghazi Tractors,437.75,437.5,437.5,422.92,429.99,-9.08,12959,2025-04-29 23:44:29.507952
1,Hinopak Motor,353.81,350.0,350.0,343.0,347.05,-7.25,5851,2025-04-29 23:44:29.507952
2,Millat Tractors,595.38,586.0,605.0,584.05,592.0,-2.13,121281,2025-04-29 23:44:29.507952
3,Atlas Battery,292.11,277.01,277.01,263.05,268.5,-23.25,111493,2025-04-29 23:44:29.507952
4,Bal.Wheels,130.43,131.0,132.0,129.0,129.03,-0.38,7060,2025-04-29 23:44:29.507952
...,...,...,...,...,...,...,...,...,...
253,ICC Industries,10.30,10.3,10.3,9.66,9.67,-0.24,2672,2025-04-29 23:44:29.528190
254,Philip Morris Pak.,1107.67,1115.0,1115.0,1040.0,1074.95,-42.03,855,2025-04-29 23:44:29.528190
255,P.N.S.C,284.66,281.0,288.99,278.0,283.01,-2.9,36662,2025-04-29 23:44:29.528713
256,Pak.Int.Container,42.70,42.5,42.97,42.5,42.5,-0.11,21965,2025-04-29 23:44:29.528713


In [19]:
df.dtypes

titles                object
ldcp                  object
opens                 object
high                  object
low                   object
current               object
change                object
volume                object
scrap_time    datetime64[ns]
dtype: object

In [20]:
df = df.replace(',', '', regex=True)
df = df.replace('', pd.NA)

In [21]:
type_conv = {
    'ldcp': float,
    'opens': float,
    'high': float,
    'low': float,
    'current': float,
    'change': float,
    'volume': float
}

for i in type_conv:
    df[i] = pd.to_numeric(df[i], errors='coerce')

In [22]:
df.dtypes

titles                object
ldcp                 float64
opens                float64
high                 float64
low                  float64
current              float64
change               float64
volume                 int64
scrap_time    datetime64[ns]
dtype: object

In [23]:
df['change'].nlargest(5)

38    -0.01
52    -0.01
143   -0.01
150   -0.01
177   -0.01
Name: change, dtype: float64

In [24]:
df['opens'].nlargest(5)

62     23001.01
58      6900.00
56      1720.01
120     1590.65
10      1499.02
Name: opens, dtype: float64

In [25]:
df['day_range'] = df['high'] - df['low']

In [26]:
df

Unnamed: 0,titles,ldcp,opens,high,low,current,change,volume,scrap_time,day_range
0,AL-Ghazi Tractors,437.75,437.50,437.50,422.92,429.99,-9.08,12959,2025-04-29 23:44:29.507952,14.58
1,Hinopak Motor,353.81,350.00,350.00,343.00,347.05,-7.25,5851,2025-04-29 23:44:29.507952,7.00
2,Millat Tractors,595.38,586.00,605.00,584.05,592.00,-2.13,121281,2025-04-29 23:44:29.507952,20.95
3,Atlas Battery,292.11,277.01,277.01,263.05,268.50,-23.25,111493,2025-04-29 23:44:29.507952,13.96
4,Bal.Wheels,130.43,131.00,132.00,129.00,129.03,-0.38,7060,2025-04-29 23:44:29.507952,3.00
...,...,...,...,...,...,...,...,...,...,...
253,ICC Industries,10.30,10.30,10.30,9.66,9.67,-0.24,2672,2025-04-29 23:44:29.528190,0.64
254,Philip Morris Pak.,1107.67,1115.00,1115.00,1040.00,1074.95,-42.03,855,2025-04-29 23:44:29.528190,75.00
255,P.N.S.C,284.66,281.00,288.99,278.00,283.01,-2.90,36662,2025-04-29 23:44:29.528713,10.99
256,Pak.Int.Container,42.70,42.50,42.97,42.50,42.50,-0.11,21965,2025-04-29 23:44:29.528713,0.47


In [27]:
df['volatility_perc'] = ((df['high'] - df['low'])/ df['opens'])*100

In [28]:
df

Unnamed: 0,titles,ldcp,opens,high,low,current,change,volume,scrap_time,day_range,volatility_perc
0,AL-Ghazi Tractors,437.75,437.50,437.50,422.92,429.99,-9.08,12959,2025-04-29 23:44:29.507952,14.58,3.332571
1,Hinopak Motor,353.81,350.00,350.00,343.00,347.05,-7.25,5851,2025-04-29 23:44:29.507952,7.00,2.000000
2,Millat Tractors,595.38,586.00,605.00,584.05,592.00,-2.13,121281,2025-04-29 23:44:29.507952,20.95,3.575085
3,Atlas Battery,292.11,277.01,277.01,263.05,268.50,-23.25,111493,2025-04-29 23:44:29.507952,13.96,5.039529
4,Bal.Wheels,130.43,131.00,132.00,129.00,129.03,-0.38,7060,2025-04-29 23:44:29.507952,3.00,2.290076
...,...,...,...,...,...,...,...,...,...,...,...
253,ICC Industries,10.30,10.30,10.30,9.66,9.67,-0.24,2672,2025-04-29 23:44:29.528190,0.64,6.213592
254,Philip Morris Pak.,1107.67,1115.00,1115.00,1040.00,1074.95,-42.03,855,2025-04-29 23:44:29.528190,75.00,6.726457
255,P.N.S.C,284.66,281.00,288.99,278.00,283.01,-2.90,36662,2025-04-29 23:44:29.528713,10.99,3.911032
256,Pak.Int.Container,42.70,42.50,42.97,42.50,42.50,-0.11,21965,2025-04-29 23:44:29.528713,0.47,1.105882


In [29]:
df['volatility_perc'].nlargest(5)

222    29.014085
237    26.289517
98     24.727273
169    24.285714
236    23.018868
Name: volatility_perc, dtype: float64

In [30]:
df_volatile = df.nlargest(5, 'volatility_perc')

In [31]:
df_high_performance = df.nlargest(5, 'change')

In [32]:
df_low_performance = df.nsmallest(5, 'change')

In [50]:

def create_connection():
    conn_str = (
        r"DRIVER={SQL Server};"
        r"SERVER=Kabir-Khan-PC;"  
        r"DATABASE=airflowProject;"
         r"Trusted_Connection=yes;"    
    )

    try:
        conn = pyodbc.connect(conn_str)
        print("✅ Connected to SQL Server successfully!")
        return conn
    except Exception as e:
        print(f"❌ Error connecting to SQL Server: {e}")
        return None

def create_table_if_not_exists(table_name):
    conn = create_connection()
    if not conn:
        return
    cursor = conn.cursor()
    
    create_table_query = f"""
    IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='{table_name}' and xtype='U')
    CREATE TABLE {table_name} (
        titles varchar(50) primary key,
        ldcp decimal(10,2),
        opens decimal(10,2),
        high decimal(10,2),
        low decimal(10,2),
        [current] decimal(10,2),  
        [change] decimal(10,2),   
        volume decimal(10,2),
        scrap_time datetime,
        day_range decimal(5,2),   
        volatility_perc decimal(5,2)
    )
    """
    
    try:
        cursor.execute(create_table_query)
        conn.commit()
        print(f"✅ Table ensured: {table_name}")
    except Exception as e:
        print(f"❌ Error creating table: {e}")
    finally:
        cursor.close()
        conn.close()

def insert_data_to_db(table_name):
    conn = create_connection()
    if not conn:
        return

    cursor = conn.cursor()
    for index,row in df.iterrows():
        cursor.execute(f'INSERT INTO {table_name} (titles, ldcp, opens, high, low, [current], [change], volume, scrap_time, day_range, volatility_perc) values (?,?,?,?,?,?,?,?,?,?,?)', 
                        row['titles'], 
                        row['ldcp'], 
                        row['opens'],
                        row['high'],
                        row['low'],
                        row['current'],
                        row['change'],
                        row['volume'],
                        row['scrap_time'],
                        row['day_range'],
                        row['volatility_perc'])
        cnxn.commit()
        print(f"✅ Inserted {len(df)} rows into database.")
    cursor.close()
    cnxn.close()

In [51]:
create_connection()

✅ Connected to SQL Server successfully!


<pyodbc.Connection at 0x2097bdbaa70>

In [52]:
create_table_if_not_exists('test')

✅ Connected to SQL Server successfully!
✅ Table ensured: test


In [53]:
insert_data_to_db('test')

✅ Connected to SQL Server successfully!


ProgrammingError: ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'volatility_perc'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Statement(s) could not be prepared. (8180)")