In [2]:
import datetime
import pandas as pd
import numpy as np
import urllib.error
from urllib.error import HTTPError

start = datetime.datetime.strptime("19-09-2016", "%d-%m-%Y")
end = datetime.datetime.strptime("31-01-2019", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days = x) for x in range(0, (end - start).days)]

dates_list = []
for date in date_generated:
    txt = str(str(date.day) + '.' + str(date.month) + '.' + str(date.year))
    dates_list.append(txt)

ndf = pd.DataFrame()  # create empty ndf
errors = []
for i in range(0, len(dates_list)):
    allURL = 'https://www.uzse.uz/trade_results?date=' + dates_list[i] + '&locale=en&mkt_id=ALL&page=%d'

    for k in range(1, 100):
        url = allURL % k

        try:
            if pd.read_html(url)[0].empty:
                break
            else:
                chunk = pd.read_html(url)[0]
                chunk['Date'] = dates_list[i] # Date is positioned at last position, let's fix that
                cols = chunk.columns.tolist() # get a list of all the columns
                cols = cols[-1:] + cols[:-1] # rearrange the columns, move the last element (Date) to the first position
                chunk = chunk[cols] # reorder the dataframe
                ndf = pd.concat([ndf, chunk])
        except HTTPError:
            errors.append(url)

ndf.head()

Unnamed: 0,Date,Time,ISIN,Isu Name,Тип ценной бумаги,Market,BRD ID,Trade Price,Trading Volumn,Trading Value
0,19.9.2016,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,Простыеакции,STK,G1,500.0,126000,"UZS 63,000,000"
0,20.9.2016,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,Простыеакции,STK,G1,2380.0,14213,"UZS 33,826,940"
0,26.9.2016,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,Простыеакции,STK,NC,10000.0,40,"UZS 400,000"
0,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [16]:
errors

['https://www.uzse.uz/trade_results?date=30.11.2017&locale=en&mkt_id=ALL&page=1']

Realized that before exporting this dataframe to a **csv** file, I need to format out:
- column names;
- change all cyrillic words to English;
- fix the **Trading Value** column, so that it shows as a number.

In [6]:
ndf.columns

Index(['Date', 'Time', 'ISIN', 'Isu Name', 'Тип ценной бумаги', 'Market',
       'BRD ID', 'Trade Price', 'Trading Volumn', 'Trading Value'],
      dtype='object')

In [8]:
ndf.columns = ndf.columns.str.replace(' ', '_')
ndf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Тип_ценной_бумаги,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,19.9.2016,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,Простыеакции,STK,G1,500.0,126000,"UZS 63,000,000"
0,20.9.2016,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,Простыеакции,STK,G1,2380.0,14213,"UZS 33,826,940"
0,26.9.2016,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,Простыеакции,STK,NC,10000.0,40,"UZS 400,000"
0,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [9]:
ndf.columns.values[4] = 'Security_Type'
ndf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Security_Type,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,19.9.2016,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,Простыеакции,STK,G1,500.0,126000,"UZS 63,000,000"
0,20.9.2016,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,Простыеакции,STK,G1,2380.0,14213,"UZS 33,826,940"
0,26.9.2016,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,Простыеакции,STK,NC,10000.0,40,"UZS 400,000"
0,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,27.9.2016,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [10]:
ndf['Date'] = ndf['Date'].str.replace('.','-')
ndf['Date'] = pd.to_datetime(ndf['Date'], format='%d-%m-%Y')
ndf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Security_Type,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,2016-09-19,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,Простыеакции,STK,G1,500.0,126000,"UZS 63,000,000"
0,2016-09-20,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,Простыеакции,STK,G1,2380.0,14213,"UZS 33,826,940"
0,2016-09-26,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,Простыеакции,STK,NC,10000.0,40,"UZS 400,000"
0,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [13]:
replace_values = {'Простыеакции' : 'CommonStock', 'Прив.акции' : 'PrefStock', 'Облигации' : 'Bond'}                                                                                          
ddf = ndf.replace({"Security_Type": replace_values})
ddf.Security_Type.value_counts()

CommonStock    17151
PrefStock       2090
Bond              10
Name: Security_Type, dtype: int64

In [15]:
ndf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Security_Type,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,2016-09-19,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,Простыеакции,STK,G1,500.0,126000,"UZS 63,000,000"
0,2016-09-20,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,Простыеакции,STK,G1,2380.0,14213,"UZS 33,826,940"
0,2016-09-26,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,Простыеакции,STK,NC,10000.0,40,"UZS 400,000"
0,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,Простыеакции,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [16]:
ddf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Security_Type,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,2016-09-19,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,CommonStock,STK,G1,500.0,126000,"UZS 63,000,000"
0,2016-09-20,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,CommonStock,STK,G1,2380.0,14213,"UZS 33,826,940"
0,2016-09-26,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,CommonStock,STK,NC,10000.0,40,"UZS 400,000"
0,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,CommonStock,STK,G1,100.0,20000000,"UZS 2,000,000,000"
1,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,CommonStock,STK,G1,100.0,29000000,"UZS 2,900,000,000"


In [20]:
print('Trade_Price min is: ', ddf['Trade_Price'].min())
print('Trade_Price max is: ', ddf['Trade_Price'].max())
print('Trading_Volumn min is: ', ddf['Trading_Volumn'].min())
print('Trading_Volumn max is: ', ddf['Trading_Volumn'].max())

Trade_Price min is:  0.01
Trade_Price max is:  1000000.0
Trading_Volumn min is:  0
Trading_Volumn max is:  1880000000


In [22]:
ddf.dtypes

Date              datetime64[ns]
Time                      object
ISIN                      object
Isu_Name                  object
Security_Type             object
Market                    object
BRD_ID                    object
Trade_Price              float64
Trading_Volumn             int64
Trading_Value             object
dtype: object

In [23]:
import numpy as np
# ddf['Trade_Price'] = ddf['Trade_Price'].apply(np.float64)
ddf['Trading_Value'] = ddf.Trade_Price * ddf.Trading_Volumn
ddf.head()

Unnamed: 0,Date,Time,ISIN,Isu_Name,Security_Type,Market,BRD_ID,Trade_Price,Trading_Volumn,Trading_Value
0,2016-09-19,10:02:00,UZ7055450009 BSEZ,ChEII <Bektemir-spirt eksperimental zavodi> AJ,CommonStock,STK,G1,500.0,126000,63000000.0
0,2016-09-20,10:02:00,UZ7014810004 UKPS,<O'zkimyopolimersavdo> aksiyadorlik jamiyati,CommonStock,STK,G1,2380.0,14213,33826940.0
0,2016-09-26,11:19:40,UZ7007130006 A007130,<Elektrqishloqqurilish> AJ,CommonStock,STK,NC,10000.0,40,400000.0
0,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,CommonStock,STK,G1,100.0,20000000,2000000000.0
1,2016-09-27,13:05:47,UZ7037610001 SVDB,AT <Savdogarbank>,CommonStock,STK,G1,100.0,29000000,2900000000.0


Exporting this dataframe to **csv** so that I do not have to wait for a long time to scrape the data. The *r* character should be placed before the path name (to take care of any symbols within the path name, such as the backslash symbol). Otherwise, you’ll get the following error: *(unicode error) ‘unicodeescape’ codec can’t decode bytes in position 2-3: truncated \UXXXXXXXX escape*

In [24]:
ddf.to_csv(r'C:\Users\Alisher\Desktop\ddf.csv')