# Import dependencies and libraries
        - make sure the constituents_csv.csv ( used for ticker to sector map) is available in the same directory when you run this code. 


In [None]:
# pip install pickle, yfinance
import bs4 as bs
import datetime as dt
import os
import pickle
import requests
import pandas as pd
import yfinance as yf
import pymongo


# Generate S&P 500 Ticker list

In [None]:
# Define function that generates the S&P 500 ticker list by scraping wikipedia
def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
        
    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
    return tickers

# Call the save_sp500_tickers to return the list of tickers
s=save_sp500_tickers()

# Strip the '/n' from the ticker list and assign it to a list variable called ticker 
ticker=[]

for i in s:
    tr=i.strip('\n')
    ticker.append(tr)
    
# display S&P 500 ticker list
ticker

# Generate Ticker to Sector map

In [None]:
# Read CSV file to display the symbol ->Name-> sector info
sector=pd.read_csv('constituents_csv.csv')
sector.head()



In [None]:
# Create a dictionary sector_map that maps ticker to sector
sector_map = dict(zip(sector.Symbol,sector.Sector))
sector_map

#  S&P 500 data - Download historical market data ( 1mo) from Yahoo! Finance 

In [None]:

# initialize variables and counter
df_ = []
count=0

# loop through ticker list and pull historic market data from yahoo Finance
for ticker in ticker:
    stock = yf.Ticker(ticker)
    stock_data=stock.history(period="1mo",actions=False,auto_adjust=False)
    # map  ticker symbol to sector and add as columns to dataframe
    stock_data['Symbol']=ticker
    stock_data['Sector']=stock_data['Symbol'].map(sector_map)
    df_.append(stock_data)
    
    #continue appending dataframe to a list (df_) and pop off the last dataframe. df_[0] is now the ultimate dataframe with al stockdata.
    if (len(df_)>1):
        df_[0] = df_[0].append(df_[-1])
        df_.pop(-1)

# rename the dataframe. final_data is the ultimate dataframe with stockdata for S&P 500 for 1 month period
final_data=df_[0]
final_data



In [None]:
# sort data by descending order on date, and sort on Symbol and Sector
market_data=final_data.reset_index()
market_data.sort_values(by=['Date'], ascending=False,inplace=True)
market_data.sort_values(by=['Sector','Symbol',],na_position='first',inplace=True)

# Drop NaN values , 50 companies do not have an assoicated sector indentified and will be dropped
market_Data=market_data.dropna()
market_Data.reset_index(drop=True,inplace=True)
market_Data=market_Data.reset_index()
market_Data


# Connect to MongoDB and store data

In [None]:
# modify the dataframe and convert to dict to store in mongoDB
market_Data = market_Data.rename(columns={'index':'_id'})

# Add 1 to each "_id" because first activity must start with 1 not 0.  Starting with "_id" = 0 will throw an error
market_Data['_id'] = market_Data['_id'] + 1 

# convert df to dict
market_data = market_Data.to_dict('records')
market_data

In [None]:
# Set up the MongoDB connection through pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")

# Create DB

db = myclient["stock_Data"]

# Create collection and insert all the data into the MongoDB

mycol = db["Market_Data"]
mycol.drop()
mycol.insert_many(market_data)

# Func to return 1 company stock data - Download historical market data ( 1mo) from Yahoo! Finance 

In [None]:
def stock_data():
    stock = yf.Ticker("GOOGL")
    stock_data1=stock.history(period="1mo",actions=False,auto_adjust=False)
    # map  ticker symbol to sector and add as columns to dataframe
    stock_data1['Symbol']="GOOGL"
    stock_data1['Sector']=stock_data1['Symbol'].map(sector_map)
    stock_data1
    #  sort data by descending order on date and reset index
    stock_data1.reset_index(inplace=True)
    stock_data1.sort_values(by=['Date'], ascending=False,inplace=True)
    stock_data1.reset_index(drop=True,inplace=True)
    return stock_data1 # this returns a dataframe, let me know if this needs to be a dict.

In [None]:
stock_data2=stock_data()
stock_data2['Date']

In [None]:
stock_data2['Date'] = pd.to_datetime(stock_data2['Date'])
stock_data2['Date'].dt.date

In [None]:
# Get data function
def get_data(df):  
    data = stock_data2.copy()
#     data['Date'] = data['Date'].str.split('-').str[2]
#     data['Date'] = pd.to_numeric(data['Date'])
    data['Date'] = pd.to_datetime(data['Date'])
#     data['Date']=data['Date'].dt.date
    data['Date'] = pd.to_numeric(data['Date'])
    return [ data['Date'].tolist(), data['Adj Close'].tolist() ] # Convert Series to list
dates, prices = get_data(stock_data2)

In [None]:
dates

In [None]:
import numpy as np
from sklearn.svm import SVR 
import matplotlib.pyplot as plt 
import pandas as pd 

%matplotlib inline

In [None]:
# predict and plot function
def predict_prices(dates, prices, x):
    dates = np.reshape(dates,(len(dates), 1)) # convert to 1xn dimension
    x = np.reshape(x,(len(x), 1))
    
    svr_lin  = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
    
    # Fit regression model
    svr_lin .fit(dates, prices)
    svr_poly.fit(dates, prices)
    svr_rbf.fit(dates, prices)
    
    plt.scatter(dates, prices, c='k', label='Data')
    plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model')
    plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model')    
    plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model')
    
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Support Vector Regression')
    plt.legend()
    plt.show()
    
    return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]

In [None]:
predicted_price = predict_prices(dates, prices, [31])