In [1]:
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from matplotlib import style
import glob
%matplotlib inline
style.use('ggplot')

In [2]:
tickers = pd.read_csv('tickers_new.csv')
ticker_list = tickers['ticker'].values.tolist()
company_list = tickers['company'].values.tolist()
sector_list = tickers['sector'].values.tolist() 

#### Plug in both company name and sector for each company ticket

In [3]:
Y = []
for symbols,company,sector in zip(ticker_list,company_list,sector_list):
    df = pd.read_csv(f'stock_csvs/stock_pup_{symbols}.csv')
    df['symbol'] = symbols
    df['company'] = company
    df['sector']  = sector
    cols = df.columns.tolist()
    cols = cols[-3:] + cols[:-3]
    df = df[cols]
    Y.append(df)

In [4]:
fund_stocks = pd.concat(Y, sort = False)

In [5]:
fund_stocks.shape

(62850, 44)

#### Quarter End to Date Time

In [6]:
# Quarter End to Date time
fund_stocks['Quarter end'] = pd.to_datetime(fund_stocks['Quarter end'])
fund_stocks.set_index("Quarter end", inplace=True)

#### Change date from 2000 to current year

In [7]:
#2000 to current year
five_yr_fstock = fund_stocks['2000':]

In [8]:
#convert strings into numeric integers
five_yr_fstock = five_yr_fstock.apply(pd.to_numeric, errors='ignore')

#### create unmatched companies in new dataframe

In [9]:
### assign variable to unmatch
nomatch_companies = five_yr_fstock[five_yr_fstock['company'] == 'nomatch']

#### Create a new data frame with nomatched companies

In [10]:
## assign variable to matched company and sector
company_fund = five_yr_fstock[five_yr_fstock['company'] != 'nomatch']

In [11]:
# 135 companies were not matched to finviz company
nomatch_companies.groupby('symbol')['company'].nunique().value_counts()

1    189
Name: company, dtype: int64

In [12]:
# 566 companies were matched to finviz company
company_fund.groupby('company')['symbol'].nunique().value_counts()

1    566
Name: symbol, dtype: int64

#### replace all 0's and None to nan's

In [13]:
company_fund = company_fund.replace(to_replace='0', value= np.nan)

In [14]:
company_fund = company_fund.replace(to_replace='None', value= np.nan )

In [15]:
company_fund = company_fund.apply(pd.to_numeric, errors='ignore')

In [16]:
company_fund['P/E ratio'].fillna(0, inplace=True)

In [17]:
company_fund.fillna(0, inplace = True)

#### Assigning reported_pe for approiate companies

In [18]:
company_fund['reported_pe'] = company_fund['P/E ratio'].apply(lambda x: 1 if x != 0 else 0)

In [19]:
company_fund['reported_pe'].value_counts()

1    35178
0     5388
Name: reported_pe, dtype: int64

#### Assigning reported_earnings for appropiate companies

In [20]:
company_fund['reported_earnings'] = company_fund['Earnings'].apply(lambda x: 1 if x != 0 else 0)

In [21]:
company_fund['reported_earnings'].value_counts()

1    40414
0      152
Name: reported_earnings, dtype: int64

In [22]:
company_fund['growth'] = company_fund['P/E ratio'].apply(lambda x: 1 if x == 0 or x >= 25 else 0)

In [23]:
company_fund['growth'].value_counts()

0    24024
1    16542
Name: growth, dtype: int64

####  Categorize Sectors

In [25]:
company_fund = company_fund.reset_index()
sectors = pd.get_dummies(company_fund['sector'], prefix= 'sector')
companies = company_fund.merge(sectors, left_index=True , right_index= True)

In [26]:
companies.set_index("Quarter end", inplace=True)

In [105]:
companies.to_csv('/Users/flatironschool/UrPortfolio/companies.csv', index = True)

### Check Company Health And Growth Fundamentals

In [28]:
companies['health_cr'] = companies['Current ratio'].apply(lambda x: 1 if x >= 1.5 and x <= 3.0 else 0)

In [29]:
companies['health_dtbr'] = companies['Long-term debt to equity ratio'].apply(lambda x: 1 if x >= .05 else 0)

In [30]:
companies['growth_roa'] = companies['ROA'].apply(lambda x: 1 if x >= .05 else 0)

In [31]:
companies['growth_roe'] = companies['ROE'].apply(lambda x: 1 if x >= .1 else 0)

In [32]:
companies['health_dyp'] = companies['Dividend payout ratio'].apply(lambda x: 1 if x >= .55 else 0)

In [33]:
companies.to_csv('/Users/flatironschool/UrPortfolio/companies.csv', index = True)

###  Prep for Classsifcation

In [77]:
classification = companies.drop(columns=['company', 'sector'])

In [78]:
classification_feat = classification.drop(columns =['P/E ratio', 'growth'])

In [79]:
classification_pred = pd.DataFrame(classification['growth'])

In [80]:
classification_pred = classification_pred.reset_index()

In [81]:
classification_pred = classification_pred.drop(columns= 'Quarter end')

In [82]:
sec_sym_com = pd.DataFrame(companies[['symbol', 'company', 'sector']])

In [83]:
classification_feat.to_csv('/Users/flatironschool/UrPortfolio/Classification_Models/classification_feat.csv')

In [84]:
classification_pred.to_csv('/Users/flatironschool/UrPortfolio/Classification_Models/classification_pred.csv')

In [85]:
sec_sym_com.to_csv('/Users/flatironschool/UrPortfolio/Classification_Models/sec_sym_com.csv')

##  Recommender System

In [86]:
companies['symbol_ID'] = companies.groupby('symbol').grouper.group_info[0]

In [91]:
companies_rec = companies

In [92]:
companies_rec.to_csv('/Users/flatironschool/UrPortfolio/recommendation_system/companies_rec.csv')

In [98]:
stz = companies[companies['symbol'] == 'STZ']