### 1. Loading yfinance data

In [None]:
#!pip install yfinance


In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime

def fetch_stock_data(company_name):
    try:
        # Attempt to download data for the provided company name (ticker)
        ticker = yf.Ticker(company_name)
        stock_data = ticker.history(period="max")  # Fetching all data since listing
        
        # If stock_data is empty, company is either delisted or doesn't exist
        if stock_data.empty:
            print(f"The company '{company_name}' is not currently listed or has been delisted.")
            return None
        
        # Return the fetched stock data
        print(f"Fetching stock data for {company_name}")
        return stock_data

    except Exception as e:
        # If an error occurs, inform the user and suggest retrying
        print(f"Error: The company '{company_name}' is not listed on the stock market. Please try again.")
        return None


# Example usage:
company_name = input("Enter the company's stock ticker symbol: ")
stock_data = fetch_stock_data(company_name)

if stock_data is not None:
    print(stock_data)


Enter the company's stock ticker symbol: AAPL
Fetching stock data for AAPL
                                 Open        High         Low       Close  \
Date                                                                        
1980-12-12 00:00:00-05:00    0.098943    0.099373    0.098943    0.098943   
1980-12-15 00:00:00-05:00    0.094211    0.094211    0.093781    0.093781   
1980-12-16 00:00:00-05:00    0.087328    0.087328    0.086898    0.086898   
1980-12-17 00:00:00-05:00    0.089049    0.089479    0.089049    0.089049   
1980-12-18 00:00:00-05:00    0.091630    0.092061    0.091630    0.091630   
...                               ...         ...         ...         ...   
2024-10-14 00:00:00-04:00  228.699997  231.729996  228.600006  231.300003   
2024-10-15 00:00:00-04:00  233.610001  237.490005  232.369995  233.850006   
2024-10-16 00:00:00-04:00  231.600006  232.119995  229.839996  231.779999   
2024-10-17 00:00:00-04:00  233.429993  233.850006  230.520004  232.149994   
2

In [3]:
stock_data.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-12-12 00:00:00-05:00,0.098943,0.099373,0.098943,0.098943,469033600,0.0,0.0
1980-12-15 00:00:00-05:00,0.094211,0.094211,0.093781,0.093781,175884800,0.0,0.0
1980-12-16 00:00:00-05:00,0.087328,0.087328,0.086898,0.086898,105728000,0.0,0.0
1980-12-17 00:00:00-05:00,0.089049,0.089479,0.089049,0.089049,86441600,0.0,0.0
1980-12-18 00:00:00-05:00,0.09163,0.092061,0.09163,0.09163,73449600,0.0,0.0


### 2. FAMA French 5-Factor Dataset

In [7]:
# URL for the Fama-French 5-factor daily data (1963-2023)
url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"

# Download and extract the CSV file
ff_factors = pd.read_csv(url, skiprows=3)

# Clean up the data by renaming columns and filtering out unnecessary rows
ff_factors.columns = ['Date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
ff_factors = ff_factors.dropna()

# Convert 'Date' to a proper datetime format
ff_factors['Date'] = pd.to_datetime(ff_factors['Date'], format='%Y%m%d')

# Set the date as the index for easier analysis
ff_factors.set_index('Date', inplace=True)

# Filter the data from 1963 to the current date
ff_factors = ff_factors[ff_factors.index >= '1963-01-01']

# Display the data
print(ff_factors)


            Mkt-RF   SMB   HML   RMW   CMA     RF
Date                                             
1963-07-01   -0.67  0.02 -0.35  0.03  0.13  0.012
1963-07-02    0.79 -0.28  0.28 -0.08 -0.21  0.012
1963-07-03    0.63 -0.18 -0.10  0.13 -0.25  0.012
1963-07-05    0.40  0.09 -0.28  0.07 -0.30  0.012
1963-07-08   -0.63  0.07 -0.20 -0.27  0.06  0.012
...            ...   ...   ...   ...   ...    ...
2024-08-26   -0.34  0.33  0.16  0.13 -0.06  0.022
2024-08-27    0.06 -0.90  0.02  0.27  0.23  0.022
2024-08-28   -0.67 -0.22  1.14  0.55 -0.16  0.022
2024-08-29    0.08  0.67  0.28 -0.15 -1.22  0.022
2024-08-30    0.98 -0.56  0.04 -0.20 -0.14  0.022

[15397 rows x 6 columns]


### 3. Average Daily Sales (ADS) Data

In [28]:
# Load the CSV file
ads_index = pd.read_csv("/Users/omkarvilasnarkar/Downloads/MLOps 7374/Project/StockPricePrediction/data/ADS_Index.csv")

In [34]:
# Rename the 'Unnamed: 0' column to 'Date'
ads_index.rename(columns={'Unnamed: 0' : 'Date'}, inplace = True)

In [38]:
# Display the data to verify the change
ads_index

Unnamed: 0,Date,ADS_Index
0,1960:03:01,-0.572620
1,1960:03:02,-0.619638
2,1960:03:03,-0.663706
3,1960:03:04,-0.704837
4,1960:03:05,-0.743043
...,...,...
22645,2022:03:01,0.983695
22646,2022:03:02,0.964968
22647,2022:03:03,0.946977
22648,2022:03:04,0.929715
