In [1]:
import nltk
import requests
import warnings
from sqlalchemy import text
from textblob import TextBlob
from nltk.sentiment import SentimentIntensityAnalyzer

import sqlite3

# Data manipulation and analysis
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from sqlalchemy.orm import sessionmaker
import matplotlib.dates as mdates

from datetime import datetime
from datetime import timedelta

# Financial data extraction
import yfinance as yf

# SQL Alchemy for database interaction
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import Session

# Additional libraries for analysis or visualization
import matplotlib.pyplot as plt
import seaborn as sns
from config import api_key

from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
conn = sqlite3.connect('stock_market_analysis.sqlite')
db = conn.cursor()

In [42]:
engine = create_engine('sqlite:///stock_market_analysis.sqlite')
session = Session(engine)


In [43]:
inspect(engine).get_table_names()

['Average_Sentiment_Score', 'stock_history']

In [44]:
pd.read_sql('SELECT * FROM Average_Sentiment_Score', engine)

Unnamed: 0,index,Ticker,Sentiment Score
0,0,msft,0.05353
1,1,shop,0.07237


In [45]:
pd.read_sql('SELECT * FROM stock_history', engine)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker


In [38]:
tickers = ['TSLA', 'MSFT', 'IBM']
start='2023-04-01'
end='2024-04-01'

In [39]:
engine = create_engine('sqlite:///stock_market_analysis.sqlite')

stock_data = {ticker: yf.download(ticker, start, end) for ticker in tickers}
for ticker, df in stock_data.items():
    df['Ticker'] = ticker

combined_df = pd.concat(stock_data.values())
combined_df.to_sql('stock_history', con=engine, if_exists='replace', index=True)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


747

In [40]:
pd.read_sql('SELECT * FROM stock_history', engine)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,2023-04-03 00:00:00.000000,199.910004,202.690002,192.199997,194.770004,194.770004,169545900,TSLA
1,2023-04-04 00:00:00.000000,197.320007,198.740005,190.320007,192.580002,192.580002,126463800,TSLA
2,2023-04-05 00:00:00.000000,190.520004,190.679993,183.759995,185.520004,185.520004,133882500,TSLA
3,2023-04-06 00:00:00.000000,183.080002,186.389999,179.740005,185.059998,185.059998,123857900,TSLA
4,2023-04-10 00:00:00.000000,179.940002,185.100006,176.110001,184.509995,184.509995,142154600,TSLA
...,...,...,...,...,...,...,...,...
742,2024-03-22 00:00:00.000000,192.000000,192.990005,190.509995,190.839996,188.964172,3987700,IBM
743,2024-03-25 00:00:00.000000,190.259995,190.820007,188.750000,188.789993,186.934326,3718300,IBM
744,2024-03-26 00:00:00.000000,189.020004,190.000000,188.500000,188.500000,186.647186,4229500,IBM
745,2024-03-27 00:00:00.000000,189.600006,190.960007,188.600006,190.800003,188.924576,3693300,IBM


In [51]:
stock_data.values()

dict_values([                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2023-04-03  199.910004  202.690002  192.199997  194.770004  194.770004   
2023-04-04  197.320007  198.740005  190.320007  192.580002  192.580002   
2023-04-05  190.520004  190.679993  183.759995  185.520004  185.520004   
2023-04-06  183.080002  186.389999  179.740005  185.059998  185.059998   
2023-04-10  179.940002  185.100006  176.110001  184.509995  184.509995   
...                ...         ...         ...         ...         ...   
2024-03-22  166.690002  171.199997  166.300003  170.830002  170.830002   
2024-03-25  168.759995  175.240005  168.729996  172.630005  172.630005   
2024-03-26  178.580002  184.250000  177.380005  177.669998  177.669998   
2024-03-27  181.410004  181.910004  176.000000  179.830002  179.830002   
2024-03-28  177.449997  179.570007  175.300003  175.789993  175.789993   

               Volume Ti

In [175]:
get_avg_sentiment_scores(tickers)

In [176]:
def get_avg_sentiment_scores(tickers):
    textual_data = {}

    for ticker in tickers:
        headlines = fetch_news(api_key, ticker)
        textual_data[ticker] = headlines

    # Analyzing sentiment for each headline and averaging the scores
    average_sentiment_scores = {}

    for ticker, headlines in textual_data.items():
        sentiments = [TextBlob(headline).sentiment.polarity if headline else 0 for headline in headlines]
        average_sentiment_scores[ticker] = np.mean(sentiments)

    # Extract tickers and sentiment scores
    tickers = list(average_sentiment_scores.keys())
    sentiment_scores = list(average_sentiment_scores.values())

    df = pd.DataFrame({"Ticker": tickers, "Sentiment Score":sentiment_scores})
    load_data_to_db(df, "Average_Sentiment_Score")

In [177]:
def fetch_news(api_key, ticker):
    base_url = "https://newsapi.org/v2/everything"
    params = {
        'q': ticker,             # Search query (ticker symbol)
        'sortBy': 'publishedAt', # Sort by publication date
        'apiKey': api_key        # Your NewsAPI key
    }
    response = requests.get(base_url, params=params)
    articles = response.json().get('articles', [])
    headlines = [article['title'] for article in articles]
    return headlines

In [178]:
get_avg_sentiment_scores(tickers)

In [179]:
def load_data_to_db(data, ticker):
    """
    Loads transformed data into the SQLite database.
    """
    engine = create_engine('sqlite:///stock_market_analysis.sqlite')
    data.to_sql(ticker, con=engine, if_exists='replace', index=True)


In [180]:
# Reset the index and drop the old index
df.reset_index(drop=True, inplace=True)
print(df.head())  # Display the first few rows of the DataFrame
df


   index  id        Date  Value
0      0   1  2024-01-01  100.0
1      1   2  2024-01-02  110.0
2      2   3  2024-01-03  120.0


Unnamed: 0,index,id,Date,Value
0,0,1,2024-01-01,100.0
1,1,2,2024-01-02,110.0
2,2,3,2024-01-03,120.0


In [181]:
def fetch_and_load_stock_data(tickers, start, end):
    stock_data = {}
    for ticker in tqdm(tickers, desc="Fetching data"):
        logger.info(f"Fetching data for {ticker}")
        try:
            data = yf.download(ticker, start=start, end=end)
            if not data.empty:
                data['Ticker'] = ticker
                stock_data[ticker] = data
                logger.info(f"Data for {ticker} fetched successfully.")
            else:
                logger.warning(f"No data found for {ticker}.")
        except Exception as e:
            logger.error(f"Failed to fetch data for {ticker}: {e}")

    if stock_data:
        df = pd.concat(stock_data.values())
        df.reset_index(inplace=True)
        load_data_to_db(df, 'stock_history')
        logger.info("Data loaded to the database successfully.")
    else:
        logger.warning("No data fetched for any of the tickers.")


In [182]:
def fetch_and_load_stock_data(tickers, start, end):
    stock_data = {}
    for ticker in tickers:
        print(f"Fetching data for {ticker}")
        try:
            stock_data[ticker] = yf.download(ticker, start, end)
            stock_data[ticker]['Ticker'] = ticker
        except Exception as e:
            print(f"Failed to get data for ticker '{ticker}': {e}")
            continue
    
    if stock_data:
        df = pd.concat(stock_data.values())
        df.reset_index(inplace=True)
        load_data_to_db(df, 'stock_history')
    else:
        print("No data fetched for the provided tickers.")


In [183]:
# Define Flask route with unique endpoint name
@app.route('/api/v1.0/load_stock_data/<tickers>/<start>/<end>', endpoint='load_stock_data_v2')
def load_stock_data(tickers, start, end):
    tickers_list = tickers.strip('[]').replace('%22', '').split(',')
    tickers_list = [ticker.strip(' "\'') for ticker in tickers_list]
    fetch_and_load_stock_data(tickers_list, start, end)
    return '<h1>Data has been loaded to the Database</h1>'


NameError: name 'app' is not defined

In [None]:
pd.DataFrame(session.execute(text('SELECT * FROM Average_Sentiment_Score')))

Unnamed: 0,index,Ticker,Sentiment Score
0,0,IBM,0.018263
1,1,MSFT,0.096461


In [None]:
pd.DataFrame(session.execute(text('SELECT * FROM stock_history')))

Unnamed: 0,index,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,0,2023-04-03 00:00:00.000000,130.970001,132.610001,130.770004,132.059998,124.965797,3840100,IBM
1,1,2023-04-04 00:00:00.000000,131.990005,132.149994,130.889999,131.600006,124.530525,3382800,IBM
2,2,2023-04-05 00:00:00.000000,131.369995,132.610001,131.369995,132.139999,125.041504,2898700,IBM
3,3,2023-04-06 00:00:00.000000,132.160004,132.600006,130.320007,130.500000,123.489601,3050600,IBM
4,4,2023-04-10 00:00:00.000000,129.830002,131.080002,129.240005,131.029999,123.991135,2614400,IBM
...,...,...,...,...,...,...,...,...,...
493,493,2024-03-22 00:00:00.000000,429.700012,429.859985,426.070007,428.739990,427.968048,17636500,MSFT
494,494,2024-03-25 00:00:00.000000,425.239990,427.410004,421.609985,422.859985,422.098633,18060500,MSFT
495,495,2024-03-26 00:00:00.000000,425.609985,425.989990,421.350006,421.649994,420.890808,16725600,MSFT
496,496,2024-03-27 00:00:00.000000,424.440002,424.450012,419.010010,421.429993,420.671204,16705000,MSFT


In [None]:
from sqlalchemy import create_engine, text, Table, Column, Integer, String, Float, MetaData
from sqlalchemy.orm import sessionmaker
import pandas as pd

# Define the database URL
DATABASE_URL = "sqlite:///your_database.db"  # Replace with your actual database URL

# Create the database engine
engine = create_engine(DATABASE_URL)

# Create a configured "Session" class
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

# Define the metadata
metadata = MetaData()

# Define the tables
average_sentiment_score = Table('Average_Sentiment_Score', metadata,
    Column('id', Integer, primary_key=True),
    Column('Date', String),
    Column('Sentiment', Float)
)

stock_history = Table('stock_history', metadata,
    Column('id', Integer, primary_key=True),
    Column('Date', String),
    Column('Ticker', String),
    Column('Close', Float)
)

# Create the tables in the database
metadata.create_all(engine)

# Insert sample data into the tables
def insert_sample_data():
    with engine.connect() as conn:
        conn.execute(text("""
            INSERT INTO Average_Sentiment_Score (Date, Sentiment) VALUES
            ('2024-01-01', 0.5),
            ('2024-01-02', 0.6),
            ('2024-01-03', 0.7)
        """))
        conn.execute(text("""
            INSERT INTO stock_history (Date, Ticker, Close) VALUES
            ('2024-01-01', 'AAPL', 150.0),
            ('2024-01-02', 'AAPL', 152.0),
            ('2024-01-03', 'AAPL', 154.0)
        """))

# Call the function to insert sample data
insert_sample_data()

# Function to execute query and convert result to DataFrame
def query_to_dataframe(query):
    with SessionLocal() as session:
        result = session.execute(text(query)).fetchall()
        df = pd.DataFrame(result)
        if len(df.columns) > 0:
            df.columns = result[0].keys()  # Assign column names from query result
    return df

# Example queries
df_sentiment = query_to_dataframe('SELECT * FROM Average_Sentiment_Score')
df_stock_history = query_to_dataframe('SELECT * FROM stock_history')

# Adjust DataFrame for stock history
if 'Date' in df_stock_history.columns:
    df_stock_history['Date'] = df_stock_history['Date'].str.replace(r'\s.*', '', regex=True)
else:
    print("Column 'Date' not found in DataFrame")

# Display the dataframes (optional)
print(df_sentiment.head())
print(df_stock_history.head())


   id        Date  Sentiment
0   1  2024-01-01        0.5
1   2  2024-01-02        0.6
2   3  2024-01-03        0.7
3   4  2024-01-01        0.5
4   5  2024-01-02        0.6
   id        Date Ticker  Close
0   1  2024-01-01   AAPL  150.0
1   2  2024-01-02   AAPL  152.0
2   3  2024-01-03   AAPL  154.0
3   4  2024-01-01   AAPL  150.0
4   5  2024-01-02   AAPL  152.0


In [None]:
import yfinance as yf
import pandas as pd

def update_stock_data(tickers, start, end):
    stock_data = {ticker: yf.download(ticker, start, end) for ticker in tickers }
    
    for ticker, df in stock_data.items():
         df['Ticker'] = ticker

    combined_df = pd.concat(stock_data.values())

    load_data_to_db(combined_df, 'stock_history')

    return "<h1>SQLite Database was updated</h1>"

tickers = ['AAPL', 'GOOGL', 'MSFT']
start = '2020-01-01'
end = '2023-01-01'
update_stock_data(tickers, start, end)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


'<h1>SQLite Database was updated</h1>'

In [None]:
import yfinance as yf
import pandas as pd

def update_stock_data(tickers, start, end):
    stock_data = {ticker: yf.download(ticker, start, end) for ticker in tickers}
    
    for ticker, df in stock_data.items():
        df['Ticker'] = ticker
    
    combined_df = pd.concat(stock_data.values())
    
    load_data_to_db(combined_df, 'stock_history')
    
    return '<h1>SQLite Database was updated</h1>'

#Example usage:
tickers = ['AAPL', 'GOOGL', 'MSFT']
start = '2020-01-01'
end = '2023-01-01'
update_stock_data(tickers, start, end)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


'<h1>SQLite Database was updated</h1>'

In [None]:
fetch_all_stock_data(tickers,start,end)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  Ticker  Sentiment Score
0    IBM         0.024996
1   MSFT         0.065728





'<h1>SQLite Database was updated</h1>'

In [None]:
x

['Average_Sentiment_Score', 'stock_history']

In [None]:
def fetch_all_stock_data(tickers, start='2023-04-01', end='2024-04-01'):
    get_avg_sentiment_scores(tickers)

    stock_data = {ticker: yf.download(ticker, start, end) for ticker in tickers}
    for ticker, df in stock_data.items():
        df['Ticker'] = ticker

    combined_df = pd.concat(stock_data.values())

    print(combined_df)
    
    load_data_to_db(combined_df, 'stock_history')

    return '<h1>SQLite Database was updated</h1>'

In [None]:
fetch_all_stock_data(tickers,start,end)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  Ticker  Sentiment Score
0    IBM         0.024996
1   MSFT         0.065728
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2023-04-03  130.970001  132.610001  130.770004  132.059998  124.965797   
2023-04-04  131.990005  132.149994  130.889999  131.600006  124.530525   
2023-04-05  131.369995  132.610001  131.369995  132.139999  125.041504   
2023-04-06  132.160004  132.600006  130.320007  130.500000  123.489601   
2023-04-10  129.830002  131.080002  129.240005  131.029999  123.991135   
...                ...         ...         ...         ...         ...   
2024-03-22  429.700012  429.859985  426.070007  428.739990  427.968048   
2024-03-25  425.239990  427.410004  421.609985  422.859985  422.098633   
2024-03-26  425.609985  425.989990  421.350006  421.649994  420.890808   
2024-03-27  424.440002  424.450012  419.010010  421.429993  420.671204   
2024-03-28  420.959991  421.869995




'<h1>SQLite Database was updated</h1>'

In [None]:
@app.route('/')
def home():
    return render_template('index.html')


In [None]:
import sqlite3
import pandas as pd

# Function to list all tables in the database
def list_tables(db_connection):
    cursor = db_connection.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    return [table[0] for table in tables]

# Function to fetch data from a specified table and convert to DataFrame
def fetch_data_from_db(table_name, db_connection):
    query = f"SELECT * FROM {table_name}"
    df = pd.read_sql_query(query, db_connection)
    return df

# Function to create a test table if no tables exist
def create_test_table(db_connection):
    cursor = db_connection.cursor()
    cursor.execute("""
        CREATE TABLE test_table (
            id INTEGER PRIMARY KEY,
            Date TEXT,
            Value REAL
        )
    """)
    cursor.execute("""
        INSERT INTO test_table (Date, Value) VALUES
        ('2024-01-01', 100.0),
        ('2024-01-02', 110.0),
        ('2024-01-03', 120.0)
    """)
    db_connection.commit()

# Connect to your SQLite database
db_path = 'your_database.db'  # Make sure this path is correct
db_connection = sqlite3.connect(db_path)

# List all tables
tables = list_tables(db_connection)
print("Available tables:", tables)

# If no tables are found, create a test table
if not tables:
    print("No tables found. Creating a test table...")
    create_test_table(db_connection)
    tables = list_tables(db_connection)
    print("Available tables after creating test table:", tables)

# Use an existing table name from the list of tables
if tables:
    table_name = tables[0]  # Replace with the desired table name
    print(f"Fetching data from table: {table_name}")

    # Fetch the data into a DataFrame
    df = fetch_data_from_db(table_name, db_connection)

    # Now you can safely call reset_index and access the Date column
    df.reset_index(inplace=True)
    print(df.head())  # Display the first few rows of the DataFrame

    # If there is a Date column, you can print it
    if 'Date' in df.columns:
        print(df.Date)
    else:
        print("No 'Date' column found in the DataFrame")
else:
    print("No tables found in the database.")


Available tables: ['test_table', 'Average_Sentiment_Score', 'stock_history']
Fetching data from table: test_table
   index  id        Date  Value
0      0   1  2024-01-01  100.0
1      1   2  2024-01-02  110.0
2      2   3  2024-01-03  120.0
0    2024-01-01
1    2024-01-02
2    2024-01-03
Name: Date, dtype: object


In [22]:
The error ValueError('unsupported pickle protocol: 5') typically occurs because the data being loaded or saved is using a newer version of the pickle protocol than what is supported by your current Python environment. Pickle protocol 5 is introduced in Python 3.8, so you need at least Python 3.8 to work with it.

Steps to Fix:

	1.	Ensure Python 3.8 or Later: Verify that your environment is using Python 3.8 or later.
	2.	Reinstall yfinance and Dependencies: Ensure that the libraries are compatible with your Python version.

Step-by-Step Instructions:

1. Check Python Version:

Ensure you are using Python 3.8 or later:

python --version

If your Python version is earlier than 3.8, you need to upgrade your Python installation.

2. Update and Reinstall Dependencies:

Update your libraries to ensure compatibility:

pip install --upgrade yfinance pandas numpy

Adjusted Code:

Ensure that your script is set up correctly for your updated environment. Here’s the complete code with print statements to verify each step:

app.py

import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine, text
from tqdm import tqdm
from flask import Flask, jsonify

# Initialize Flask app
app = Flask(__name__)

# Database connection
engine = create_engine('sqlite:///stock_market_analysis.sqlite')
SessionLocal = sessionmaker(bind=engine)

def load_data_to_db(data, table_name):
    """
    Loads transformed data into the SQLite database.
    """
    data.to_sql(table_name, con=engine, if_exists='replace', index=True)

def fetch_and_load_stock_data(tickers, start, end):
    stock_data = {}
    for ticker in tqdm(tickers, desc="Fetching data"):
        print(f"Fetching data for {ticker}")
        try:
            data = yf.download(ticker, start=start, end=end)
            if not data.empty:
                data['Ticker'] = ticker
                stock_data[ticker] = data
                print(f"Data for {ticker} fetched successfully.")
            else:
                print(f"No data found for {ticker}.")
        except Exception as e:
            print(f"Failed to fetch data for {ticker}: {e}")

    if stock_data:
        df = pd.concat(stock_data.values())
        df.reset_index(inplace=True)
        load_data_to_db(df, 'stock_history')
        print("Data loaded to the database successfully.")
    else:
        print("No data fetched for any of the tickers.")

# Define Flask route with unique endpoint name
@app.route('/api/v1.0/load_stock_data/<tickers>/<start>/<end>', endpoint='load_stock_data_v2')
def load_stock_data(tickers, start='2020-01-01', end='2024-01-01'):
    tickers_list = tickers.strip('[]').replace('%22', '').split(',')
    tickers_list = [ticker.strip(' "\'') for ticker in tickers_list]
    fetch_and_load_stock_data(tickers_list, start, end)
    return '<h1>Data has been loaded to the Database</h1>'

# Query the database and return as DataFrame
def query_to_dataframe(query):
    with SessionLocal() as session:
        result = session.execute(text(query)).fetchall()
        df = pd.DataFrame(result)
        if len(df.columns) > 0:
            df.columns = result[0].keys()  # Assign column names from query result
        else:
            print("No data returned from query.")
    return df

# Example queries
df_sentiment = query_to_dataframe('SELECT * FROM Average_Sentiment_Score')
df_stock_history = query_to_dataframe('SELECT * FROM stock_history')

# Adjust DataFrame for stock history
if 'Date' in df_stock_history.columns:
    df_stock_history['Date'] = df_stock_history['Date'].str.replace(r'\s.*', '', regex=True)
else:
    print("Column 'Date' not found in DataFrame")

if __name__ == '__main__':
    # Change port to avoid conflicts
    app.run(debug=True, port=5003)

Explanation:

	1.	Verify Python Version: Ensure your environment uses Python 3.8 or later to support pickle protocol 5.
	2.	Reinstall Dependencies: Update and reinstall yfinance, pandas, and numpy to ensure compatibility.
	3.	Print Statements for Debugging: Added print statements to verify data fetching and loading steps.

Running the Flask App:

After ensuring your environment is updated and dependencies are reinstalled, run the Flask app using:

export FLASK_APP=app.py
export FLASK_ENV=development
flask run --port 5003

By following these steps, you should be able to resolve the “unsupported pickle protocol” error and run your Flask application successfully.

No data returned from query.
Column 'Date' not found in DataFrame
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5003
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with watchdog (fsevents)
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Traceback (most recent call last):
  File "/Users/yakupaltinisik/anaconda3/envs/mlenv/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/Users/yakupaltinisik/anaconda3/envs/mlenv/lib/python3.11/site-packages/traitlets/config/application.py", line 991, in launch_instance
    app.initialize(argv)
  File "/Users/yakupaltinisik/anaconda3/envs/mlenv/lib/python3.11/site-packages/traitlets/config/application.py", line 113, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/yakupaltinisik/anaconda3/envs/mlenv/lib/python3.11/site-

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
The error “Unsupported pickle protocol” typically indicates that the version of yfinance you are using is trying to use a newer version of the pickle protocol that your environment does not support. This can happen if there is a version mismatch between different libraries or if the environment’s Python version is too old.

Steps to Fix:

	1.	Ensure You Are Using a Compatible Version of Python:
	•	Python 3.8 or later is recommended to ensure compatibility with the latest libraries.
	2.	Update the yfinance Library:
	•	Ensure you are using the latest version of yfinance.
	3.	Update Other Libraries:
	•	Update other related libraries such as pandas and numpy.

Updating Libraries and Python Environment

Update Python (if necessary):

Make sure you are using a Python version that supports the latest yfinance library.

Update Libraries:

You can update your libraries using pip.

pip install --upgrade yfinance pandas numpy

Adjusted Code

Here’s the complete code with instructions on how to update the environment:

app.py

import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine, text
from tqdm import tqdm
from flask import Flask, jsonify

# Initialize Flask app
app = Flask(__name__)

# Database connection
engine = create_engine('sqlite:///stock_market_analysis.sqlite')
SessionLocal = sessionmaker(bind=engine)

def load_data_to_db(data, table_name):
    """
    Loads transformed data into the SQLite database.
    """
    data.to_sql(table_name, con=engine, if_exists='replace', index=True)

def fetch_and_load_stock_data(tickers, start, end):
    stock_data = {}
    for ticker in tqdm(tickers, desc="Fetching data"):
        print(f"Fetching data for {ticker}")
        try:
            data = yf.download(ticker, start=start, end=end)
            if not data.empty:
                data['Ticker'] = ticker
                stock_data[ticker] = data
                print(f"Data for {ticker} fetched successfully.")
            else:
                print(f"No data found for {ticker}.")
        except Exception as e:
            print(f"Failed to fetch data for {ticker}: {e}")

    if stock_data:
        df = pd.concat(stock_data.values())
        df.reset_index(inplace=True)
        load_data_to_db(df, 'stock_history')
        print("Data loaded to the database successfully.")
    else:
        print("No data fetched for any of the tickers.")

# Define Flask route with unique endpoint name
@app.route('/api/v1.0/load_stock_data/<tickers>/<start>/<end>', endpoint='load_stock_data_v2')
def load_stock_data(tickers, start='2020-01-01', end='2024-01-01'):
    tickers_list = tickers.strip('[]').replace('%22', '').split(',')
    tickers_list = [ticker.strip(' "\'') for ticker in tickers_list]
    fetch_and_load_stock_data(tickers_list, start, end)
    return '<h1>Data has been loaded to the Database</h1>'

# Query the database and return as DataFrame
def query_to_dataframe(query):
    with SessionLocal() as session:
        result = session.execute(text(query)).fetchall()
        df = pd.DataFrame(result)
        if len(df.columns) > 0:
            df.columns = result[0].keys()  # Assign column names from query result
        else:
            print("No data returned from query.")
    return df

# Example queries
df_sentiment = query_to_dataframe('SELECT * FROM Average_Sentiment_Score')
df_stock_history = query_to_dataframe('SELECT * FROM stock_history')

# Adjust DataFrame for stock history
if 'Date' in df_stock_history.columns:
    df_stock_history['Date'] = df_stock_history['Date'].str.replace(r'\s.*', '', regex=True)
else:
    print("Column 'Date' not found in DataFrame")

if __name__ == '__main__':
    # Change port to avoid conflicts
    app.run(debug=True, port=5003)

Explanation:

	1.	Updating Libraries: Ensure you have the latest versions of yfinance, pandas, and numpy.
	2.	Environment Compatibility: Make sure you are using a Python version compatible with the latest yfinance library (Python 3.8 or later is recommended).

Running the Flask App:

After ensuring your environment is updated, run the Flask app using:

export FLASK_APP=app.py
export FLASK_ENV=development
flask run --port 5003

By following these steps, you should be able to resolve the “Unsupported pickle protocol” error and run your Flask application successfully.