In [6]:
#!pip install kaggle pandas sqlalchemy psycopg2-binary yfinance pandas_datareader requests beautifulsoup4 python-dotenv


In [7]:
import os
import zipfile
import requests
import psycopg2
import pandas as pd
import yfinance as yf
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
from psycopg2 import sql
import matplotlib.pyplot as plt
import seaborn as sns
from io import BytesIO
from datetime import datetime

In [8]:
# Set Kaggle configuration
os.environ['KAGGLE_CONFIG_DIR'] = os.path.expanduser('~/.kaggle')

# Download the S&P 500 dataset from Kaggle
#!kaggle datasets download -d camnugent/sandp500

# Download S&P 500 dataset from Kaggle
os.system('kaggle datasets download -d camnugent/sandp500')

# Unzip the downloaded file
with zipfile.ZipFile('sandp500.zip', 'r') as zip_ref:
    zip_ref.extractall('sandp500')

# Load dataset into DataFrame
df = pd.read_csv('sandp500/all_stocks_5yr.csv')

In [9]:
import pandas as pd

# Check for missing values
print('Number of Null Values in Each Column:')
print(df.isnull().sum())

# Remove rows with missing values
cleaned_df = df.dropna()

# Convert date column to datetime
cleaned_df['date'] = pd.to_datetime(cleaned_df['date'], errors='coerce')

# Drop rows with invalid dates (NaT values)
cleaned_df = cleaned_df.dropna(subset=['date'])

# Convert date column to string format YYYY-MM-DD
cleaned_df['date'] = cleaned_df['date'].apply(lambda x: x.strftime('%Y-%m-%d'))

# Check data types for each column to ensure proper format
print('Data Types After Cleaning:')
print(cleaned_df.dtypes)

# Rename columns for clarity
cleaned_df = cleaned_df.rename(columns={'Name': 'Ticker', 'date': 'Date', 'open': 'Open_Price', 
                                        'close': 'Close_Price', 'low': 'Low_Price', 'high': 'High_Price', 
                                        'volume': 'Volume'})

# Check for missing values after cleaning
print('Number of Null Values in Each Column:')
print(cleaned_df.isnull().sum())

# Save cleaned stock data to CSV
cleaned_df.to_csv('Resource/Clean_all_stocks_data_5yrs_cleaned.csv', index=False)


Number of Null Values in Each Column:
date       0
open      11
high       8
low        8
close      0
volume     0
Name       0
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['date'] = pd.to_datetime(cleaned_df['date'], errors='coerce')


Data Types After Cleaning:
date       object
open      float64
high      float64
low       float64
close     float64
volume      int64
Name       object
dtype: object
Number of Null Values in Each Column:
Date           0
Open_Price     0
High_Price     0
Low_Price      0
Close_Price    0
Volume         0
Ticker         0
dtype: int64


In [10]:
# Fetch S&P 500 tickers from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

# Request page content with headers to avoid being blocked
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    tables = soup.find_all('table', {'class': 'wikitable'}) 
    
    # if tables:
    #     sp500_tickers_df = pd.read_html(str(tables[0]))[0]  # Extract first table
    #     sp500_tickers_df = sp500_tickers_df[['Symbol', 'Security']]
    #     sp500_tickers_df.columns = ['Ticker', 'Company_Name']  # Rename columns
        
    #     # Save to CSV
    #     sp500_tickers_df.to_csv("Ticker_data.csv", index=False, encoding='utf-8')
    if len(tables) > 0:
        table = tables[0]
        
        # Read the table directly into a DataFrame using pandas
        ticker_df = pd.read_html(str(table))[0]
        
        # Select only the Ticker Symbol and Company Name columns
        ticker_df = ticker_df[['Symbol', 'Security']]
        
        # Rename columns for clarity
        ticker_df.columns = ['Ticker', 'Company Name']

        # Display the first few rows of the DataFrame
        print(ticker_df.head())

        # Save the DataFrame to CSV
        #ticker_df.to_csv("Ticker_data.csv", index=False, encoding='utf-8')
        ticker_df.to_csv("Resources/Ticker_data.csv", index=False, encoding='utf-8')
    else:
        print("Error: S&P 500 company table not found on Wikipedia.")
else:
    print(f"Error: Failed to fetch page, status code {response.status_code}")

  Ticker         Company Name
0    MMM                   3M
1    AOS          A. O. Smith
2    ABT  Abbott Laboratories
3   ABBV               AbbVie
4    ACN            Accenture


  ticker_df = pd.read_html(str(table))[0]


In [11]:
# Check for missing values after cleaning
print('Number of Null Values in Each Column:')
print(ticker_df.isnull().sum())

Number of Null Values in Each Column:
Ticker          0
Company Name    0
dtype: int64


In [None]:
initial_db_params = {
    'dbname': 'postgres',  
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',
    'port': '5432'
}

db_params = {
    'dbname': 'stocks_dashboard_db',  
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',
    'port': '5432'
}

def terminate_sessions(cursor, dbname):
    cursor.execute(sql.SQL("""
        SELECT pg_terminate_backend(pid)
        FROM pg_stat_activity
        WHERE datname = %s AND pid <> pg_backend_pid();
    """), [dbname])

def drop_database(cursor, dbname):
    terminate_sessions(cursor, dbname)
    cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {}").format(sql.Identifier(dbname)))

def create_database(cursor, dbname):
    cursor.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(dbname)))

def execute_sql_file(cursor, sql_file_path):
    with open(sql_file_path, 'r') as file:
        sql_commands = file.read()
    cursor.execute(sql.SQL(sql_commands))

sql_file_path = 'stocks_dashboard_db_schema.sql'
dbname = 'stocks_dashboard_db'  

try:
    connection = psycopg2.connect(**initial_db_params)
    connection.autocommit = True
    cursor = connection.cursor()

    drop_database(cursor, dbname)
    print(f"Database {dbname} dropped successfully.")

    create_database(cursor, dbname)
    print(f"Database {dbname} created successfully.")

    db_params['dbname'] = dbname
    connection_string = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"

    connection.close()
    connection = psycopg2.connect(**db_params)
    cursor = connection.cursor()

    execute_sql_file(cursor, sql_file_path)
    connection.commit()
    print("SQL schema file executed successfully.")

except Exception as e:
    print(f"An error occurred: {e}")
    if connection:
        connection.rollback()

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()

db_params = {
    'dbname': 'stocks_dashboard_db',  
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',
    'port': '5432'
}

def upload_csv_to_table(connection_string, table_name, csv_file_path):
    engine = create_engine(connection_string)
    cleaned_df = pd.read_csv(csv_file_path)
    cleaned_df.to_sql(table_name, engine, if_exists='append', index=False)

csv_files = {
    'full_tb': 'C:/Users/bansr/Downloads/proj3BP/proj3BP/Clean_all_stocks_data_5yrs_cleaned.csv',
    'ticker_tb': 'C:/Users/bansr/Downloads/proj3BP/proj3BP/Ticker_data.csv'
}

connection_string = f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"

try:
    connection = psycopg2.connect(**db_params)
    cursor = connection.cursor()

    for table_name, csv_file_path in csv_files.items():
        upload_csv_to_table(connection_string, table_name, csv_file_path)
        print(f"CSV data for {table_name} uploaded successfully.")

except Exception as e:
    print(f"An error occurred: {e}")
    if connection:
        connection.rollback()

finally:
    if cursor:
        cursor.close()
    if connection:
          connection.close()

Historical Price Trends (Closing Price)
What it shows: This plot shows how the stock’s closing price has changed over time.
Why it’s useful: The historical price trend helps investors to visualize the long-term price movement of a stock. It can indicate trends, peaks, and dips over a period of time.
How it's calculated: The plot simply takes the Close_Price of the stock and plots it against the Date. This is the most basic way to track the stock's performance over time.


Volatility (Rolling Standard Deviation)
What it shows: This plot shows the annualized volatility of the stock, which measures how much the stock's price fluctuates on a daily basis.
Why it’s useful: Volatility is a critical measure for risk assessment. Higher volatility typically indicates a higher level of risk (as the stock price changes more drastically), while lower volatility suggests more stable price movement.
How it's calculated: The volatility is calculated by first computing the daily returns (percentage change in the closing price), and then calculating the rolling standard deviation over a 30-day period. The result is annualized by multiplying it by the square root of 252 (trading days in a year).


Moving Averages (50-Day and 200-Day)
What it shows: This chart compares the stock’s closing price with two moving averages: the 50-day Simple Moving Average (SMA) and the 200-day Simple Moving Average (SMA).
Why it’s useful: Moving averages help to smooth out short-term fluctuations and highlight longer-term trends. The 50-day SMA is often used to identify medium-term trends, while the 200-day SMA shows the long-term trend.
How it's calculated:
The 50-Day SMA is the average of the closing prices over the past 50 days.
The 200-Day SMA is the average of the closing prices over the past 200 days.
The chart helps to identify crossovers between these moving averages (e.g., when the 50-day SMA crosses above the 200-day SMA, it’s considered a "bullish crossover," indicating potential buying signals).

Relative Strength Index (RSI)
What it shows: The RSI is a momentum oscillator that measures the speed and change of price movements. It helps determine whether a stock is overbought or oversold.
Why it’s useful: An RSI above 70 typically indicates that the stock is overbought (potentially overvalued), and an RSI below 30 suggests that the stock is oversold (potentially undervalued). It is used to identify potential buy and sell signals.
How it's calculated: The RSI is calculated using the average gain and loss over a 14-day period. It is plotted on a scale of 0 to 100. If the RSI is above 70, the stock is considered overbought; if it’s below 30, the stock is considered oversold.

Moving Average Convergence Divergence (MACD)
What it shows: The MACD is a trend-following momentum indicator that shows the relationship between two moving averages of a stock’s price. It helps identify changes in the strength, direction, momentum, and duration of a trend.
Why it’s useful: The MACD is useful for identifying potential buy or sell signals. A crossover between the MACD and its signal line can indicate when to buy or sell.
How it's calculated:
The MACD Line is calculated by subtracting the 26-period EMA (Exponential Moving Average) from the 12-period EMA.
The Signal Line is a 9-period EMA of the MACD line.
When the MACD crosses above the signal line, it can be a bullish signal. When the MACD crosses below the signal line, it can be a bearish signal.

Bollinger Bands
What it shows: Bollinger Bands are volatility bands placed above and below a moving average. The distance between the bands increases or decreases based on volatility.
Why it’s useful: The upper and lower bands provide insights into the overbought or oversold conditions of the stock. When the price reaches the upper band, the stock may be considered overbought, and when it reaches the lower band, it may be considered oversold.
How it's calculated:
The Middle Band is the 20-period simple moving average (SMA) of the closing prices.
The Upper Band is the 20-period SMA plus two times the standard deviation of the closing prices.
The Lower Band is the 20-period SMA minus two times the standard deviation of the closing prices.

In [None]:


# Set the style for the plots
sns.set(style="darkgrid")

# Function to plot historical price trends, volatility, moving averages, etc., for a given ticker
def plot_stock_analysis(ticker):
    # Load stock data for the selected ticker
    stock_data = cleaned_df[cleaned_df['Ticker'] == ticker]
    
    if stock_data.empty:
        print(f"No data found for ticker: {ticker}")
        return

    # Ensure the data is sorted by date
    stock_data = stock_data.sort_values(by='Date')

    # 1. Historical Price Trends show the overall price movement.
    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['Close_Price'], label='Closing Price', color='b', alpha=0.7)
    plt.title(f"Historical Price Trends for {ticker}")
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 2. Volatility (Rolling Standard Deviation) measures the risk level of the stock.
    stock_data['Daily_Return'] = stock_data['Close_Price'].pct_change()
    stock_data['Volatility'] = stock_data['Daily_Return'].rolling(window=30).std() * (252 ** 0.5)  # Annualized volatility

    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['Volatility'], label='30-Day Rolling Volatility', color='r')
    plt.title(f"Volatility for {ticker}")
    plt.xlabel('Date')
    plt.ylabel('Annualized Volatility')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 3. Moving Averages (50-Day and 200-Day) help smooth out the price fluctuations and spot trends.
    stock_data['SMA_50'] = stock_data['Close_Price'].rolling(window=50).mean()
    stock_data['SMA_200'] = stock_data['Close_Price'].rolling(window=200).mean()

    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['Close_Price'], label='Close Price', color='b', alpha=0.7)
    plt.plot(stock_data['Date'], stock_data['SMA_50'], label='50-Day SMA', color='orange')
    plt.plot(stock_data['Date'], stock_data['SMA_200'], label='200-Day SMA', color='green')
    plt.title(f"{ticker} Closing Price and Moving Averages")
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 4. RSI (Relative Strength Index) shows the stock’s potential overbought/oversold condition.
    def compute_rsi(data, window=14):
        delta = data.diff()
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        avg_gain = gain.rolling(window=window, min_periods=1).mean()
        avg_loss = loss.rolling(window=window, min_periods=1).mean()
        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    stock_data['RSI'] = compute_rsi(stock_data['Close_Price'])

    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['RSI'], label='RSI', color='purple')
    plt.axhline(30, color='red', linestyle='--')
    plt.axhline(70, color='green', linestyle='--')
    plt.title(f"RSI for {ticker}")
    plt.xlabel('Date')
    plt.ylabel('RSI')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 5. MACD (Moving Average Convergence Divergence) gives insights into the stock’s momentum.
    short_window = 12
    long_window = 26
    signal_window = 9

    stock_data['EMA_short'] = stock_data['Close_Price'].ewm(span=short_window, adjust=False).mean()
    stock_data['EMA_long'] = stock_data['Close_Price'].ewm(span=long_window, adjust=False).mean()
    stock_data['MACD'] = stock_data['EMA_short'] - stock_data['EMA_long']
    stock_data['MACD_signal'] = stock_data['MACD'].ewm(span=signal_window, adjust=False).mean()

    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['MACD'], label='MACD', color='blue')
    plt.plot(stock_data['Date'], stock_data['MACD_signal'], label='MACD Signal', color='orange')
    plt.title(f"MACD for {ticker}")
    plt.xlabel('Date')
    plt.ylabel('MACD')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 6. Bollinger Bands provide a measure of volatility and potential overbought/oversold signals.
    rolling_mean = stock_data['Close_Price'].rolling(window=20).mean()
    rolling_std = stock_data['Close_Price'].rolling(window=20).std()
    stock_data['Bollinger_Upper'] = rolling_mean + (rolling_std * 2)
    stock_data['Bollinger_Lower'] = rolling_mean - (rolling_std * 2)

    plt.figure(figsize=(10,6))
    plt.plot(stock_data['Date'], stock_data['Close_Price'], label='Close Price', color='blue', alpha=0.7)
    plt.plot(stock_data['Date'], stock_data['Bollinger_Upper'], label='Upper Band', color='red', linestyle='--')
    plt.plot(stock_data['Date'], stock_data['Bollinger_Lower'], label='Lower Band', color='red', linestyle='--')
    plt.title(f"Bollinger Bands for {ticker}")
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

# Example usage: Allow the user to input/select the ticker
ticker_input = input("Enter a ticker symbol (e.g., AAPL, MSFT, TSLA): ").upper()  # Convert input to uppercase
plot_stock_analysis(ticker_input)


In [None]:
# Ensure the data is sorted by date
stock_data = stock_data.sort_values(by='Date')

# 2. Volatility (Rolling Standard Deviation) measures the risk level of the stock.
stock_data['Daily_Return'] = stock_data['Close_Price'].pct_change()
stock_data['Volatility'] = stock_data['Daily_Return'].rolling(window=30).std() * (252 ** 0.5)  # Annualized volatility

plt.figure(figsize=(10,6))
plt.plot(stock_data['Date'], stock_data['Volatility'], label='30-Day Rolling Volatility', color='r')
plt.title(f"Volatility for {ticker}")
plt.xlabel('Date')
plt.ylabel('Annualized Volatility')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
    
# Example usage: Allow the user to input/select the ticker
ticker_input = input("Enter a ticker symbol (e.g., AAPL, MSFT, TSLA): ").upper()  # Convert input to uppercase
plot_stock_analysis(ticker_input)


NameError: name 'stock_data' is not defined

In [36]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

# Function to fetch stock data based on user input
def fetch_stock_data(ticker, interval='1d'):
    # Fetch stock data from Yahoo Finance
    stock = yf.Ticker(ticker)
    
    # Get historical market data
    if interval == '1d':
        data = stock.history(period="1d")
    elif interval == '5d':
        data = stock.history(period="5d")
    elif interval == '6m':
        data = stock.history(period="6mo")
    elif interval == '1y':
        data = stock.history(period="1y")
    elif interval == '5y':
        data = stock.history(period="5y")
    elif interval == 'max':
        data = stock.history(period="max")
    
    return data

# Function to plot the stock data
def plot_stock_data(data, ticker):
    # Plotting the stock closing price
    plt.figure(figsize=(10, 6))
    plt.plot(data.index, data['Close'], label='Closing Price', color='blue')
    # Dynamic title based on the ticker
    plt.title(f'{ticker} Stock Price')  
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.legend(loc='upper left')
    

    # Save plot to a BytesIO object
    img = BytesIO()
    plt.savefig(img, format='png')
    img.seek(0)
    
    # Convert image to base64
    img_base64 = base64.b64encode(img.getvalue()).decode('utf-8')
    return img_base64

#     # Show the plot
#     plt.show()

# # Fetch data for the user-input ticker (example: 'AAPL') and interval (example: '1y')
# ticker_input = 'AAPL'  # This would be dynamically set from the HTML input
# interval_input = '1y'  # This would be dynamically set from the HTML input

# data = fetch_stock_data(ticker_input, interval_input)
# plot_stock_data(data, ticker_input)
