In [95]:
# Import the required libraries and dependencies 
import os
import pandas as pd
import hvplot.pandas
from pathlib import Path
import plotly.express as px


In [96]:
# Import the data by reading in the CSV file
ipo_df = pd.read_csv(
    Path("./csvs/sector_data.csv")
)

In [97]:
# Review the first five rows of the ipo_df.
ipo_df.head()

Unnamed: 0,Year,Symbol,Volume,Sector,Industry,State/Country
0,1999,A,59753154.0,Healthcare,Biotechnology: Laboratory Analytical Instruments,CA
1,2014,AAC,2799073.0,Financial,Medical Specialities,TN
2,2013,AAOI,948999.0,Technology,Semiconductors,TX
3,2001,AAP,371100.0,Consumer Cyclical,Other Specialty Stores,VA
4,2011,AAT,15536889.0,Financial,Financial Investment Trusts,CA


In [98]:
# Drop any years less than 2011.
# Sort by Year and Volume
# Then groupby and sort_values by Year 
ipo_df.drop(ipo_df[ipo_df['Year'] < 2011].index, inplace = True)
ipo_df.sort_values(['Year', 'Volume'], ascending = False, inplace = True)
ipo_group = ipo_df.groupby('Year').head(10).sort_values(['Year'])
ipo_group

Unnamed: 0,Year,Symbol,Volume,Sector,Industry,State/Country
2578,2011,P,42152450.0,Other,Broadcasting,CA
3687,2011,ZNGA,116826166.0,Communications,EDP Services,CA
3663,2011,YNDX,70186723.0,Communications,"Computer Software: Programming, Data Processing",the Netherlands
1568,2011,HCA,64600369.0,Healthcare,Hospital/Nursing Management,TN
215,2011,ARCO,63777574.0,Consumer Cyclical,Restaurants,Uruguay
1955,2011,KORS,42261341.0,Other,Apparel,the United Kingdom
1942,2011,KMI,50613513.0,Energy,Natural Gas Distribution,TX
1520,2011,GRPN,49813026.0,Communications,Advertising,IL
934,2011,DNKN,45407627.0,Other,Restaurants,MA
2385,2011,NLSN,53157427.0,Industrials,Business Services,NY


In [99]:
#Display hvplot bar graph with x-axis as Symbol and y-axis as Volume, include title.
ipo_group.hvplot.bar(
    x="Symbol",
    y="Volume",
    title="Top IPO's 2011-2021 by Volume",
    height=500,
    width=2500,
    rot=45
).opts(yformatter="%.0f")

In [100]:
annual_ipo_avg = ipo_group.groupby('Year').mean()

annual_ipo_avg

Unnamed: 0_level_0,Volume
Year,Unnamed: 1_level_1
2011,59879621.6
2012,81466657.5
2013,44704534.0
2014,72325600.5
2015,35971871.2
2016,29338094.8
2017,49640004.8
2018,38123491.5
2019,79181731.1
2020,154048861.2


In [101]:
annual_ipo_avg.hvplot(
    x='Year',
    y='Volume',
    title="Average IPO Trade Volume by Year, 2011-2021"
).opts(
    yformatter='%.0f'
)

In [102]:
(ipo_group.hvplot.bar(groupby='Sector', x='Year', y='Volume', rot=90, width=550) +
 ipo_group.hvplot.table(['Year', 'Symbol', 'Volume'], width=420))

In [103]:
ipo_group.set_index('Year').hvplot.table(
    columns=['Symbol', 'Volume', 'Sector', 'Industry'],
    groupby='Year',
    x='Symbol',
    y='Volume',
    xlabel="Symbol", 
    ylabel="Volume",
    title='IPO Trade Volume by Year'
)

In [104]:
ipo_df_2011 = ipo_df
ipo_df_2011.drop(ipo_df_2011[ipo_df_2011['Year'] < 2011].index, inplace = True)
ipo_df_2011.drop(ipo_df_2011[ipo_df_2011['Year'] > 2011].index, inplace = True)
ipo_df_2011.sort_values(['Year', 'Volume'], ascending = False, inplace = True)
ipo_df_2011= ipo_df_2011.groupby('Year').head(10).sort_values(['Year'])
ipo_df_2011


Unnamed: 0,Year,Symbol,Volume,Sector,Industry,State/Country
3687,2011,ZNGA,116826166.0,Communications,EDP Services,CA
3663,2011,YNDX,70186723.0,Communications,"Computer Software: Programming, Data Processing",the Netherlands
1568,2011,HCA,64600369.0,Healthcare,Hospital/Nursing Management,TN
215,2011,ARCO,63777574.0,Consumer Cyclical,Restaurants,Uruguay
2385,2011,NLSN,53157427.0,Industrials,Business Services,NY
1942,2011,KMI,50613513.0,Energy,Natural Gas Distribution,TX
1520,2011,GRPN,49813026.0,Communications,Advertising,IL
934,2011,DNKN,45407627.0,Other,Restaurants,MA
1955,2011,KORS,42261341.0,Other,Apparel,the United Kingdom
2578,2011,P,42152450.0,Other,Broadcasting,CA


In [105]:
(ipo_2011.hvplot.bar (x='Symbol', y='Volume', rot=90, width=550) +
 ipo_2011.hvplot.table(['Year', 'Symbol', 'Volume'], width=420))