# Libs

In [65]:
import json
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import requests
from bs4 import BeautifulSoup
import tensorflow as tf

# config file updation

In [75]:
# Read config from JSON file
with open('recommend.json') as f:
    config = json.load(f)

# Set data source based on config
data_source = ''
if config['data_source']['yfinance']:
    data_source = 'yfinance'
elif config['data_source']['alphavantage']:
    data_source = 'alphavantage'
elif config['data_source']['direct_csv_import']:
    data_source = 'direct_csv_import'
else:
    raise ValueError("Invalid configuration for data source")


# GPU

In [76]:
# Use GPU based on config
if config['use_gpu']:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# Dataset

In [77]:
# Fetch data based on data source
if data_source == 'yfinance':
    data = []
    symbols_df = pd.read_csv(config['csv_file_path'])
    symbols = symbols_df['Symbol'].tolist()
    total_symbols = len(symbols)
    for i, symbol in enumerate(symbols):
        url = f"https://finance.yahoo.com/quote/{symbol}"
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        name_element = soup.find("h1", {"class": "D(ib) Fz(18px)"})
        name = name_element.text if name_element else ''
        sector = ''
        industry = ''
        sector_span = soup.find("span", string="Sector(s)")
        industry_span = soup.find("span", string="Industry")
        if sector_span:
            sector = sector_span.find_next_sibling("span").text
        if industry_span:
            industry = industry_span.find_next_sibling("span").text
        data.append({'Symbol': symbol, 'Name': name, 'Sector': sector, 'Industry': industry})
        print(f"Processing: {i+1}/{total_symbols} ({symbol})")
    data = pd.DataFrame(data)

elif data_source == 'alphavantage':
    api_key = 'YOUR_ALPHAVANTAGE_API_KEY'
    symbols_df = pd.read_csv(config['csv_file_path'])
    symbols = symbols_df['Symbol'].tolist()
    stock_data = []
    total_symbols = len(symbols)
    for i, symbol in enumerate(symbols):
        url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={api_key}'
        response = requests.get(url)
        if response.status_code == 200:
            stock_info = response.json()
            name = stock_info.get('Name', '')  # Using .get() with a default value
            sector = stock_info.get('Sector', '')
            industry = stock_info.get('Industry', '')
            stock_data.append({'Symbol': symbol, 'Name': name, 'Sector': sector, 'Industry': industry})
            print(f"Processing: {i+1}/{total_symbols} ({symbol})")
        else:
            print(f"Failed to fetch data for {symbol}. Status code: {response.status_code}")
    data = pd.DataFrame(stock_data)

elif data_source == 'direct_csv_import':
    data = pd.read_csv(config['csv_file_path'])
    data['Sector'] = data['Sector'].fillna(0)
    data['Industry'] = data['Industry'].fillna(0)

else:
    raise ValueError("Invalid data source selected")

# Data Visualization

In [78]:
data.head(50)

Unnamed: 0,Symbol,Name,Sector,Industry
0,PIH,"1347 Property Insurance Holdings, Inc.",Finance,Property-Casualty Insurers
1,PIHPP,"1347 Property Insurance Holdings, Inc.",Finance,Property-Casualty Insurers
2,TURN,180 Degree Capital Corp.,Finance,Finance/Investors Services
3,FLWS,"1-800 FLOWERS.COM, Inc.",Consumer Services,Other Specialty Stores
4,FCCY,1st Constitution Bancorp (NJ),Finance,Savings Institutions
5,SRCE,1st Source Corporation,Finance,Major Banks
6,VNET,"21Vianet Group, Inc.",Technology,"Computer Software: Programming, Data Processing"
7,TWOU,"2U, Inc.",Technology,Computer Software: Prepackaged Software
8,TPNL,"3PEA International, Inc.",0,0
9,JOBS,"51job, Inc.",Technology,Diversified Commercial Services


In [79]:
# Convert values to strings
data['Sector'] = data['Sector'].astype(str)
data['Industry'] = data['Industry'].astype(str)



In [80]:
data = data.dropna()



In [81]:
# Concatenate text columns to create a single column for vectorization
data['text'] = data['Name'] + ' ' + data['Sector'] + ' ' + data['Industry']



# TExt Vectorization(Text into Numbers)

In [82]:
# Vectorize the text column
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(data['text'])



# Algo(Cosine Similarities)

In [83]:
# Calculate cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)



In [84]:
# Function to get recommendations based on stock symbol
def get_recommendations(symbol, cosine_sim=cosine_sim, top_n=10):
    idx = data[data['Symbol'] == symbol].index[0]  # Get the index of the stock
    sim_scores = list(enumerate(cosine_sim[idx]))  # Get the similarity scores of all stocks
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Sort the stocks based on similarity
    sim_scores = sim_scores[1:top_n+1]  # Get the top similar stocks
    stock_indices = [i[0] for i in sim_scores]  # Get the indices of the similar stocks
    return data.iloc[stock_indices][['Symbol', 'Name', 'Sector', 'Industry']]


In [85]:

def get_last_click_recommendations(last_clicked_stock, top_n=10):
    recommendations = get_recommendations(last_clicked_stock, top_n=top_n)
    recommendations_list = []
    for i, row in enumerate(recommendations.itertuples(), start=1):
        recommendation = f"{to_numbering_system(i)}. {row.Symbol}: {row.Name}"
        recommendations_list.append(recommendation)
    return '\n'.join(recommendations_list)



In [86]:
def to_numbering_system(n):
    # Function to convert number to numbering system (e.g., 1, 2, 3, ... to A, B, C, ...)
    result = ''
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        result = chr(65 + remainder) + result
    return result



# Recommendation based on stock

In [87]:
last_clicked_stock = 'MSFT'  # Example last clicked stock symbol
recommendations = get_last_click_recommendations(last_clicked_stock)
print(recommendations)


A. AMSWA: American Software, Inc.
B. PRGS: Progress Software Corporation
C. NATI: National Instruments Corporation
D. UPLD: Upland Software, Inc.
E. APPN: Appian Corporation
F. CPAH: CounterPath Corporation
G. DWCH: Datawatch Corporation
H. EGAN: eGain Corporation
I. QUMU: Qumu Corporation
J. SINA: Sina Corporation
