In [28]:
import pandas as pd
import numpy as np
import os
import sys
from decouple import config

sys.path.append(config('PYTHONPATH'))

from sqlalchemy import create_engine
from bokeh.plotting import figure, output_file, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Turbo256, RdYlBu11
from bokeh.io import output_notebook
from bokeh.layouts import row

output_notebook()

In [2]:
engine = create_engine(config('ENGINE_PATH'))
df = pd.read_sql_table('industry_breakdown', schema='research', con=engine)

# Visualization

In [30]:
# Sector
df_dd = df[['ticker', 'sector', 'industry']].drop_duplicates()
df_sector_histogram = df_dd.groupby('sector').count().sort_values('ticker', ascending=False)[['ticker']]

# Plot
s3 = figure(
    plot_width=1000, 
    plot_height=500, 
    y_range=list(reversed(df_sector_histogram.reset_index()['sector'])), 
    x_axis_label='Number of Companies',
    title='Sector Breakdown',
)
s3.hbar(y=df_sector_histogram.reset_index()['sector'], height=0.86, left=0, right=df_sector_histogram.reset_index()['ticker'], color=(230, 54, 54))
s3.xaxis.axis_label_text_font_size = '20px'
s3.yaxis.axis_label_text_font_size = '20px'
s3.title.text_font_size = '20pt'

show(s3)

In [31]:
# Industry
df_dd = df[['ticker', 'sector', 'industry']][~df['industry'].isna()].drop_duplicates()
df_dd['secotr_industry'] = df_dd.apply(lambda x: f'{x.sector}: {x.industry}', axis=1)
df_sector_histogram = df_dd.groupby('secotr_industry').count().sort_values('ticker', ascending=False)[['ticker']].iloc[:20, :]
print('Total Industries: ', len(df_sector_histogram))

# Plot
s2 = figure(
    plot_width=1000, 
    plot_height=500, 
    y_range=list(reversed(df_sector_histogram.reset_index()['secotr_industry'])), 
    x_axis_label='Number of Companies',
    title='Industry Breakdown',
)
s2.hbar(y=df_sector_histogram.reset_index()['secotr_industry'], height=0.86, left=0, right=df_sector_histogram.reset_index()['ticker'], color=(230, 54, 54))
s2.xaxis.axis_label_text_font_size = '16px'
s2.yaxis.axis_label_text_font_size = '16px'
s2.title.text_font_size = '20pt'

show(s2)

Total Industries:  20


In [32]:
# Sector Industry Breakdown
df_dd = df[['sector', 'industry']].drop_duplicates()
df_sector_industry_histogram = df_dd.groupby(['sector']).count().sort_values('industry', ascending=False)[['industry']]

# Plot
s1 = figure(
    plot_width=1000, 
    plot_height=500, 
    y_range=list(reversed(df_sector_industry_histogram.reset_index()['sector'])), 
    x_axis_label='Number of Companies',
    title='Industries per Sector',
)

s1.hbar(y=df_sector_industry_histogram.reset_index()['sector'], height=0.86, left=0, right=df_sector_industry_histogram.reset_index()['industry'], color=(230, 54, 54))
s1.xaxis.axis_label_text_font_size = '16px'
s1.yaxis.axis_label_text_font_size = '16px'
s1.title.text_font_size = '20pt'

show(s1)