In [3]:
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from matplotlib import style
import glob
import plotly.graph_objects as go
import plotly.express as px
import chart_studio
style.use('ggplot')

In [7]:
companies = pd.read_csv('csv_files/companies.csv')
companies.head()


Unnamed: 0,Quarter end,symbol,company,sector,Shares,Shares split adjusted,Split factor,Assets,Current Assets,Liabilities,...,sector_Healthcare,sector_Industrial Goods,sector_Services,sector_Technology,sector_Utilities,health_cr,health_dtbr,growth_roa,growth_roe,health_dyp
0,2019-04-30,A,"Agilent Technologies, Inc.",Healthcare,315993352,315993352,1.0,9022000000.0,3812000000.0,3897000000.0,...,1,0,0,0,0,0,1,1,1,0
1,2019-01-31,A,"Agilent Technologies, Inc.",Healthcare,317515869,317515869,1.0,8952000000.0,3712000000.0,3916000000.0,...,1,0,0,0,0,0,1,1,1,0
2,2018-10-31,A,"Agilent Technologies, Inc.",Healthcare,318533054,318533054,1.0,8541000000.0,3848000000.0,3970000000.0,...,1,0,0,0,0,0,1,0,0,1
3,2018-07-31,A,"Agilent Technologies, Inc.",Healthcare,318769547,318769547,1.0,8349000000.0,3667000000.0,3781000000.0,...,1,0,0,0,0,0,1,0,0,1
4,2018-04-30,A,"Agilent Technologies, Inc.",Healthcare,319952126,319952126,1.0,8784000000.0,4525000000.0,4167000000.0,...,1,0,0,0,0,0,1,0,0,1


In [8]:
companies = companies.rename(columns={"Quarter end": "date"})

In [9]:
companies = companies.sort_values('sector')

In [17]:
labels = ['Services','Financial','Technology','Consumer Goods','Industrial Goods',  
          'Basic Materials','Healthcare','Utilities'] 
values = companies['sector'].value_counts(normalize = False)

fig = go.Figure(data =[go.Pie(labels=labels, values=values)])
fig.update_layout(
    title='Amount of Sectors',
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)


fig.show()
#fig.write_image('images/amountofsectors.png', engine='kaleido')


In [18]:
labels = ['Basic Materials', 'Consumer Goods', 'Financial','Healthcare',
          'Industrial Goods','Services','Technology','Utilities']
y = companies.groupby('sector')['Earnings'].sum()
fig = go.Figure([go.Bar(x=labels, y=y)])
fig.update_layout(
    title='Industry Growth 2000-2019',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='USD (millions)',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [19]:
labels = ['growth', 'value']
y = companies.groupby('sector')['growth'].value_counts()
fig = go.Figure([go.Bar(x=labels, y=y)])
fig.update_layout(
    title='Count Of Growth And Value Stocks',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Total Count',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [21]:
labels = ['growth', 'value']
y = companies.groupby(['sector','growth'])['Price'].sum()
fig = go.Figure([go.Bar(x=labels, y=y)])
fig.update_layout(
    title='Growth vs Value Stock by Price 2000-2019',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='USD (millions)',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [22]:
sectors_growth = pd.DataFrame(companies.groupby(['sector','growth'])['Price'].sum())
sectors_growth = sectors_growth.reset_index()
value_stocks = sectors_growth[sectors_growth['growth'] == 0].sort_values('sector')
growth_stocks = sectors_growth[sectors_growth['growth'] == 1].sort_values('sector')
value_stocks = value_stocks.rename({'growth': 'value'}, axis =1 )

In [23]:
label = value_stocks['sector']
values = value_stocks['Price']
growing = growth_stocks['Price']
fig = go.Figure()
fig.add_trace(go.Bar(x=label,
                y= growing,
                name='Growth Stock',
                marker_color='rgb(55, 83, 109)'
                ))
fig.add_trace(go.Bar(x=label,
                y=values,
                name='Value Stock',
                marker_color='rgb(26, 118, 255)'
                ))

fig.update_layout(
    title='Industry Growth and Value Stocks 2000-2019',
    xaxis_tickfont_size= 10,
    yaxis=dict(
        title='USD (millions)',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [24]:
sectors_count = pd.DataFrame(companies.groupby(['sector'])['growth'].value_counts())
sectors_count = sectors_count.unstack(1).reset_index()
sectors_count.columns = sectors_count.columns.droplevel()
sectors_count = sectors_count.rename({'':'sector', 0: 'value', 1: 'growth'}, axis = 1)

In [25]:
label = sectors_count['sector']
values = sectors_count['value']
growing = sectors_count['growth']
fig = go.Figure()
fig.add_trace(go.Bar(x=label,
                y= growing,
                name='Growth Stock',
                marker_color='rgb(55, 83, 109)'
                ))
fig.add_trace(go.Bar(x=label,
                y=values,
                name='Value Stock',
                marker_color='rgb(26, 118, 255)'
                ))

fig.update_layout(
    title='Industry Growth and Value Stocks by P/E ratio',
    xaxis_tickfont_size= 10,
    yaxis=dict(
        title='Total Count',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [26]:
sectors_growth = pd.DataFrame(companies.groupby(['sector','growth'])['Earnings'].sum())
sectors_growth = sectors_growth.reset_index()
value_stocks = sectors_growth[sectors_growth['growth'] == 0].sort_values('sector')
growth_stocks = sectors_growth[sectors_growth['growth'] == 1].sort_values('sector')
value_stocks = value_stocks.rename({'growth': 'value'}, axis =1 )

In [27]:
fig = px.bar(value_stocks, x='value', y='Earnings',
             hover_data=['value', 'sector'], color='sector',
             labels={''}, height=400)
fig.update_layout(barmode='group', xaxis={'categoryorder':'category ascending'})

fig.show()

In [28]:
fig = px.bar(growth_stocks, x='growth', y='Earnings',
             hover_data=['growth', 'sector'], color='sector',
             labels={'l'}, height=400)
fig.update_layout(barmode='group', xaxis={'categoryorder':'category ascending'})

fig.show()

In [31]:
companies.columns[:5]



Index(['date', 'symbol', 'company', 'sector', 'Shares'], dtype='object')


#### Sectors Growth P/E ratio From 2000-2019

In [32]:
sectors_dated = pd.DataFrame(companies.groupby(['sector','growth','date'])['Price'].sum())
sectors_dated = sectors_dated.reset_index()

value_dated = sectors_dated[sectors_dated['growth'] == 0]
growth_dated = sectors_dated[sectors_dated['growth'] == 1]

value_dated = value_dated.rename({'growth': 'value'}, axis =1 )

value_dated.date = pd.to_datetime(value_dated.date)
value_dated = value_dated.resample('D', on='date').sum()

growth_dated.date = pd.to_datetime(growth_dated.date)
growth_dated = growth_dated.resample('D', on='date').sum()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [33]:
growth = pd.pivot_table(growth_dated, values = ['growth','Price'], index = 'date')
value = pd.pivot_table(value_dated, values = ['value','Price'], index = 'date')

In [34]:
trace1 = go.Scatter(x = growth.index,
                 y = growth.Price,
                 mode = 'lines+markers',
                 name = 'growth stock')
trace0 = go.Scatter(x = value.index,
                 y = value.Price,
                 mode = 'lines+markers',
                 name = 'value stock')
data = [trace0,trace1]
layout = go.Layout(title ='Growth vs Value Price 2000-2019 by P/E ratio')
figure = go.Figure(data = data, layout = layout)
figure.show()

### Health of Company

In [35]:
sectors_health = pd.DataFrame(companies.groupby(['symbol','sector'])['health_cr'].value_counts())
sectors_health = sectors_health.unstack(2).reset_index()
sectors_health.columns = sectors_health.columns.droplevel(1)

In [36]:
cols = []
count = 1
for column in sectors_health.columns:
    if column == 'health_cr':
        cols.append(f'health_cr_{count}')
        count+=1
        continue
    cols.append(column)
sectors_health.columns = cols

In [39]:
sectors_health_20 = sectors_health.head(50)

In [41]:
label = sectors_health_20['symbol']
not_healthy = sectors_health_20['health_cr_1']
healthy = sectors_health_20['health_cr_2']
fig = go.Figure()
fig.add_trace(go.Bar(x=label,
                y= healthy,
                name='Healthy',
                marker_color='rgb(55, 83, 109)'
                ))
fig.add_trace(go.Bar(x=label,
                y= not_healthy,
                name='Not Healthy',
                marker_color='rgb(27, 155, 203)'
                ))

fig.update_layout(
    title='Stocks Health By Current Ratio',
    xaxis_tickfont_size= 10,
    yaxis=dict(
        title='Total Count',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='stack',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

KeyError: 'health_cr_1'

In [43]:
sectors_roe = pd.DataFrame(companies.groupby(['symbol','sector'])['growth_roe'].value_counts())
sectors_roe = sectors_roe.unstack(2).reset_index()
sectors_roe.columns = sectors_roe.columns.droplevel(1)

In [44]:
cols = []
count = 0
for column in sectors_roe.columns:
    if column == 'growth_roe':
        cols.append(f'growth_roe_{count}')
        count+=1
        continue
    cols.append(column)
sectors_roe.columns = cols

In [49]:
sectors_roe_50 = sectors_roe
sectors_roe_50.head()

Unnamed: 0,symbol,sector,count,count.1
0,A,Healthcare,32.0,46.0
1,AA,Basic Materials,54.0,24.0
2,AAN,Services,17.0,61.0
3,AAP,Services,4.0,66.0
4,AAPL,Consumer Goods,15.0,64.0


In [46]:
label = sectors_roe_50['symbol']
not_growing = sectors_roe_50['growth_roe_0']
growths = sectors_roe_50['growth_roe_1']
fig = go.Figure()
fig.add_trace(go.Bar(x=label,
                y= growths,
                name='Growing',
                marker_color='rgb(55, 83, 109)'
                ))
fig.add_trace(go.Bar(x=label,
                y= not_growing,
                name='Not Growing',
                marker_color='rgb(27, 155, 203)'
                ))

fig.update_layout(
    title='Stocks Growth By ROE',
    xaxis_tickfont_size= 10,
    yaxis=dict(
        title='Total Count',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='stack',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

KeyError: 'growth_roe_0'