# Financial Sector Analysis

### Financial Analysis using Data Exploration and Data Visualization

### Step 1: Connecting to data source

In [1]:
import sqlite3 as sql

db = sql.connect('finance_db')
c = db.cursor()

c.execute('select * from financials')
data = c.fetchall()

In [2]:
import pandas as pd
import numpy as np

columns = ['symbol',
        'name',
        'sector',
        'price',
        'price_earnings' ,
        'dividend_yield' ,
        'earnings_share' ,
        'annual_week_low' ,
        'annual_week_high' ,
        'market_cap' ,
        'epittda' ,
        'price_sales' ,
        'price_book' ,
        'sec_filings' ]

df = pd.DataFrame(data, columns=columns)
df.head()

Unnamed: 0,symbol,name,sector,price,price_earnings,dividend_yield,earnings_share,annual_week_low,annual_week_high,market_cap,epittda,price_sales,price_book,sec_filings
0,MMM,3M Company,Industrials,222.89,24.31,2.332862,7.92,259.77,175.49,138721100000.0,9048000000.0,4.390271,11.34,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,AOS,A.O. Smith Corp,Industrials,60.24,27.76,1.147959,1.7,68.39,48.925,10783420000.0,601000000.0,3.575483,6.35,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABT,Abbott Laboratories,Health Care,56.27,22.51,1.908982,0.26,64.6,42.28,102121000000.0,5744000000.0,3.74048,3.19,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,108.48,19.41,2.49956,3.29,125.86,60.05,181386300000.0,10310000000.0,6.291571,26.14,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACN,Accenture plc,Information Technology,150.51,25.47,1.71447,5.44,162.6,114.82,98765860000.0,5643228000.0,2.604117,10.62,http://www.sec.gov/cgi-bin/browse-edgar?action...


### Step 2: Data Exploration

In [3]:
from IPython.display import display

display(df.head(1))
display(df.describe())
display(pd.DataFrame(df.dtypes, columns=['datatypes']))
print(f"{len(df.symbol.unique())} unique symbols\n")
display(pd.DataFrame(df['sector'].value_counts()))
print(df.shape)

Unnamed: 0,symbol,name,sector,price,price_earnings,dividend_yield,earnings_share,annual_week_low,annual_week_high,market_cap,epittda,price_sales,price_book,sec_filings
0,MMM,3M Company,Industrials,222.89,24.31,2.332862,7.92,259.77,175.49,138721100000.0,9048000000.0,4.390271,11.34,http://www.sec.gov/cgi-bin/browse-edgar?action...


Unnamed: 0,price,price_earnings,dividend_yield,earnings_share,annual_week_low,annual_week_high,market_cap,epittda,price_sales,price_book
count,505.0,503.0,505.0,505.0,505.0,505.0,505.0,505.0,505.0,497.0
mean,103.830634,24.80839,1.895953,3.753743,122.623832,83.536616,49239440000.0,3590328000.0,3.941705,14.453179
std,134.427636,41.241081,1.537214,5.689036,155.36214,105.725473,90050170000.0,6840544000.0,3.46011,89.660508
min,2.82,-251.53,0.0,-28.01,6.59,2.8,2626102000.0,-5067000000.0,0.153186,0.51
25%,46.25,15.35,0.794834,1.49,56.25,38.43,12732070000.0,773932000.0,1.62949,2.02
50%,73.92,19.45,1.769255,2.89,86.68,62.85,21400950000.0,1614399000.0,2.89644,3.4
75%,116.54,25.75,2.781114,5.14,140.13,96.66,45119680000.0,3692749000.0,4.703842,6.11
max,1806.06,520.15,12.661196,44.09,2067.99,1589.0,809508000000.0,79386000000.0,20.094294,1403.38


Unnamed: 0,datatypes
symbol,object
name,object
sector,object
price,float64
price_earnings,float64
dividend_yield,float64
earnings_share,float64
annual_week_low,float64
annual_week_high,float64
market_cap,float64


505 unique symbols



Unnamed: 0,sector
Consumer Discretionary,84
Information Technology,70
Financials,68
Industrials,67
Health Care,61
Consumer Staples,34
Real Estate,33
Energy,32
Utilities,28
Materials,25


(505, 14)


### Step 3: Data Visualization

In [4]:
import sys
sys.path

['/Users/andrejacobs/Desktop/DataProjects/financial market',
 '/Users/andrejacobs/Desktop/DataProjects',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python39.zip',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9/lib-dynload',
 '',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9/site-packages']

In [5]:
sys.path.append('/Users/andrejacobs/Desktop/DataProjects/helper functions')
sys.path

['/Users/andrejacobs/Desktop/DataProjects/financial market',
 '/Users/andrejacobs/Desktop/DataProjects',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python39.zip',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9/lib-dynload',
 '',
 '/Users/andrejacobs/opt/anaconda3/envs/tensor/lib/python3.9/site-packages',
 '/Users/andrejacobs/Desktop/DataProjects/helper functions']

In [8]:
df.columns

Index(['symbol', 'name', 'sector', 'price', 'price_earnings', 'dividend_yield',
       'earnings_share', 'annual_week_low', 'annual_week_high', 'market_cap',
       'epittda', 'price_sales', 'price_book', 'sec_filings'],
      dtype='object')

In [9]:
from plotly_scatter import flex_scatter

x = df[['price_earnings', 'dividend_yield',
        'earnings_share', 'annual_week_low', 'annual_week_high', 'market_cap',
        'epittda', 'price_sales', 'price_book']]

y = df['price']

flex_scatter(x, y)

In [12]:
from plotly.express import histogram

fig = histogram(df, x='sector', title='Distribution of the different sectors')
fig.show()

### Some Aggregate Statistics to take a Look at Different Sectors Return on Stocks

In [22]:
grouped = df.groupby('sector')[['price', 'dividend_yield']].mean().sort_values(by='dividend_yield', ascending=False)
grouped['count'] = df.value_counts('sector')
grouped

Unnamed: 0_level_0,price,dividend_yield,count
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Telecommunication Services,33.603333,7.567797,3
Real Estate,88.712727,3.894758,33
Utilities,55.194643,3.759224,28
Consumer Staples,79.764118,2.433391,34
Financials,89.056029,2.017241,68
Energy,57.8875,2.014452,32
Materials,102.3868,1.737976,25
Consumer Discretionary,124.034524,1.572412,84
Industrials,116.887612,1.479392,67
Information Technology,119.242857,1.228494,70


#### There could be potential bias present due to the fact that only three companies are traded in the Telecommunication sector. This could cause an abnormal average due to low count. ####

In [21]:
from plotly.express import bar

fig = bar(data_frame=grouped, x='dividend_yield', title='Dividend yields per sector')
fig.show()

### This dataset does not provide enough context to extract any more meaning. The conclusion for this study is that Real Estate and Utilities are good sectors to invest in.