In [1]:
#import dependencies 
import pandas as pd
from pathlib import Path

In [2]:
#read csv
asx_df = pd.read_csv('Resources/ASX_Listed_Companies_02-11-2023.csv')
asx_df.head()

Unnamed: 0,ASX code,Company name,GICs industry group,Listing date,Market Cap
0,14D,1414 DEGREES LIMITED,Capital Goods,12/09/2018,8574067
1,1AD,ADALTA LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",22/08/2016,10184494
2,1AE,AURORA ENERGY METALS LIMITED,Materials,18/05/2022,17906374
3,1AG,ALTERRA LIMITED,"Food, Beverage & Tobacco",16/05/2008,4875868
4,1AI,ALGORAE PHARMACEUTICALS LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",01/09/2004,21264623


In [3]:
#re-name columns 
asx_df.columns = ['ticker', 'company_name', 'industry_gp', 'listing_date', 'market_cap']
asx_df

Unnamed: 0,ticker,company_name,industry_gp,listing_date,market_cap
0,14D,1414 DEGREES LIMITED,Capital Goods,12/09/2018,8574067
1,1AD,ADALTA LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",22/08/2016,10184494
2,1AE,AURORA ENERGY METALS LIMITED,Materials,18/05/2022,17906374
3,1AG,ALTERRA LIMITED,"Food, Beverage & Tobacco",16/05/2008,4875868
4,1AI,ALGORAE PHARMACEUTICALS LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",01/09/2004,21264623
...,...,...,...,...,...
2028,ZLD,ZELIRA THERAPEUTICS LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",28/07/2003,10325911
2029,ZMI,ZINC OF IRELAND NL,Materials,18/09/2007,3197164
2030,ZMM,ZIMI LIMITED,Technology Hardware & Equipment,10/09/2007,4321596
2031,ZNC,ZENITH MINERALS LIMITED,Materials,29/05/2007,28895232


In [4]:
#check data types for columns
# asx_df.info()
#Drop the market_cap values 'suspended'
asx_df['market_cap'] = pd.to_numeric(asx_df['market_cap'], errors='coerce') #convert to numeric and coerce other non numeric to NaN
asx_df = asx_df.dropna(subset=['market_cap']) # drop all NaN values in market_cap
asx_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1940 entries, 0 to 2032
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ticker        1940 non-null   object 
 1   company_name  1940 non-null   object 
 2   industry_gp   1940 non-null   object 
 3   listing_date  1940 non-null   object 
 4   market_cap    1940 non-null   float64
dtypes: float64(1), object(4)
memory usage: 90.9+ KB


In [5]:
#drop the listing_date column 
asx_df = asx_df.drop('listing_date', axis=1)
asx_df


Unnamed: 0,ticker,company_name,industry_gp,market_cap
0,14D,1414 DEGREES LIMITED,Capital Goods,8574067.0
1,1AD,ADALTA LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",10184494.0
2,1AE,AURORA ENERGY METALS LIMITED,Materials,17906374.0
3,1AG,ALTERRA LIMITED,"Food, Beverage & Tobacco",4875868.0
4,1AI,ALGORAE PHARMACEUTICALS LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",21264623.0
...,...,...,...,...
2028,ZLD,ZELIRA THERAPEUTICS LIMITED,"Pharmaceuticals, Biotechnology & Life Sciences",10325911.0
2029,ZMI,ZINC OF IRELAND NL,Materials,3197164.0
2030,ZMM,ZIMI LIMITED,Technology Hardware & Equipment,4321596.0
2031,ZNC,ZENITH MINERALS LIMITED,Materials,28895232.0


In [6]:
#count inicidences of Industry Group
industry_count = asx_df['industry_gp'].value_counts()
industry_count

industry_gp
Materials                                         774
Energy                                            129
Software & Services                               122
Financial Services                                106
Not Applic                                         84
Health Care Equipment & Services                   83
Pharmaceuticals, Biotechnology & Life Sciences     82
Capital Goods                                      80
Commercial & Professional Services                 54
Consumer Services                                  49
Food, Beverage & Tobacco                           46
Equity Real Estate Investment Trusts (REITs)       45
Consumer Discretionary Distribution & Retail       45
Media & Entertainment                              44
Technology Hardware & Equipment                    28
Transportation                                     26
Real Estate Management & Development               24
Utilities                                          20
Telecommunicatio

In [7]:
#check top 10 for each industry group 
#sort by ind_gp and market cap in descending order. 
asx_df.sort_values(by=['industry_gp', 'market_cap'], ascending=[True, False], inplace=True)

#group by 'industry_gp' 
top_tickers_by_industry = asx_df.groupby('industry_gp').head(10)
top_ten_df = top_tickers_by_industry
top_ten_df = top_ten_df.reset_index(drop=True)
top_ten_df

Unnamed: 0,ticker,company_name,industry_gp,market_cap
0,ARB,ARB CORPORATION LIMITED.,Automobiles & Components,2.455925e+09
1,GUD,G.U.D. HOLDINGS LIMITED,Automobiles & Components,1.534343e+09
2,PWH,PWR HOLDINGS LIMITED,Automobiles & Components,1.000822e+09
3,SFC,SCHAFFER CORPORATION LIMITED,Automobiles & Components,2.272901e+08
4,AHL,ADRAD HOLDINGS LIMITED,Automobiles & Components,7.275023e+07
...,...,...,...,...
254,CEN,CONTACT ENERGY LIMITED,Utilities,5.638375e+09
255,GNE,GENESIS ENERGY LIMITED,Utilities,2.267491e+09
256,D2O,DUXTON WATER LIMITED,Utilities,2.519890e+08
257,GNX,GENEX POWER LIMITED,Utilities,2.077766e+08


In [8]:
# Export the DataFrame to a CSV file
top_ten_df.to_csv('Resources/ASX_Top10_Industry.csv', index=False)

In [9]:
ticker_list = top_ten_df['ticker'].tolist()
print(ticker_list)

['ARB', 'GUD', 'PWH', 'SFC', 'AHL', 'VMT', 'RPM', 'ABV', 'DDT', 'SIX', 'CBA', 'NAB', 'ANZ', 'WBC', 'BEN', 'VUK', 'BOQ', 'BFL', 'HGH', 'JDO', 'REH', 'SVW', 'WOR', 'IFT', 'FBU', 'RWC', 'VNT', 'JLG', 'MND', 'RDX', 'SLB', 'TRE', 'OXT', 'S3N', 'HRE', 'KLI', 'DY6', 'BXB', 'CPU', 'ALQ', 'CWY', 'DOW', 'APM', 'IPH', 'MAD', 'MMS', 'SIQ', 'WES', 'JBH', 'HVN', 'PMV', 'APE', 'SUL', 'LOV', 'BAP', 'AX1', 'BGP', 'BRG', 'AKP', 'FWD', 'WAT', 'AVJ', 'GLB', 'TWD', 'SHM', 'SIO', 'GAP', 'ALL', 'LNW', 'TLC', 'IEL', 'DMP', 'FLT', 'CTD', 'WEB', 'TAH', 'IVC', 'WOW', 'COL', 'EDV', 'MTS', 'GNC', 'MMM', 'WDS', 'STO', 'ALD', 'YAL', 'WHC', 'NXG', 'NHC', 'VEA', 'BPT', 'PDN', 'GMG', 'SCG', 'URW', 'SGP', 'VCX', 'MGR', 'GPT', 'DXS', 'CHC', 'NSR', 'MQG', 'SQ2', 'SOL', 'ASX', 'JHG', 'CGF', 'GQG', 'NWL', 'AMP', 'HUB', 'TWE', 'A2M', 'CGC', 'ING', 'ELD', 'BGA', 'AAC', 'RIC', 'CBO', 'SHV', 'RMD', 'COH', 'SHL', 'FPH', 'RHC', 'PME', 'EBO', 'ANN', 'SNZ', 'NAN', 'PTL', 'MCP', 'BIO', 'BXN', 'S66', 'HPC', 'EXL', 'SKN', 'CCO', 'HCT'