In [2]:
# Install packages from requirements.txt
!pip install -r requirements.txt




In [1]:
import os
import yfinance as yf
import pandas as pd

# Replace with your os path
new_directory = "/home/manormanore/Documents/Git Hub/StockPricePrediction"
os.chdir(new_directory)
print("Current Directory:", os.getcwd())


Current Directory: /home/manormanore/Documents/Git Hub/StockPricePrediction


### 1. Loading yfinance data

In [3]:
# Fetch Data function
def fetch_stock_data(company_ticker):
    """Fetches historical stock data"""
    
    try:
        ticker = yf.Ticker(company_ticker)
        stock_data = ticker.history(period="max")  # Fetching available data
        
        if stock_data.empty:
            print(f"The company '{company_ticker}' is not currently listed or may have been delisted.")
            return None
        
        print(f"Fetching stock data for {company_ticker}")
        return stock_data

    except Exception as e:
        # Catch exceptions, inform user
        print(f"Error: Unable to fetch data for '{company_ticker}'. Please check the ticker and try again.")
        return None

if __name__ == "__main__":
    company_ticker = "AAPL"  # Apple Inc.
    stock_data = fetch_stock_data(company_ticker)
    
    if stock_data is not None:
        print(stock_data)


Fetching stock data for AAPL
                                 Open        High         Low       Close  \
Date                                                                        
1980-12-12 00:00:00-05:00    0.098943    0.099373    0.098943    0.098943   
1980-12-15 00:00:00-05:00    0.094211    0.094211    0.093781    0.093781   
1980-12-16 00:00:00-05:00    0.087328    0.087328    0.086898    0.086898   
1980-12-17 00:00:00-05:00    0.089049    0.089479    0.089049    0.089049   
1980-12-18 00:00:00-05:00    0.091630    0.092061    0.091630    0.091630   
...                               ...         ...         ...         ...   
2024-10-23 00:00:00-04:00  234.080002  235.139999  227.759995  230.759995   
2024-10-24 00:00:00-04:00  229.979996  230.820007  228.410004  230.570007   
2024-10-25 00:00:00-04:00  229.740005  233.220001  229.570007  231.410004   
2024-10-28 00:00:00-04:00  233.320007  234.729996  232.550003  233.399994   
2024-10-29 00:00:00-04:00  233.195007  234.3200

In [4]:
stock_data.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-12-12 00:00:00-05:00,0.098943,0.099373,0.098943,0.098943,469033600,0.0,0.0
1980-12-15 00:00:00-05:00,0.094211,0.094211,0.093781,0.093781,175884800,0.0,0.0
1980-12-16 00:00:00-05:00,0.087328,0.087328,0.086898,0.086898,105728000,0.0,0.0
1980-12-17 00:00:00-05:00,0.089049,0.089479,0.089049,0.089049,86441600,0.0,0.0
1980-12-18 00:00:00-05:00,0.09163,0.092061,0.09163,0.09163,73449600,0.0,0.0


### 2. FAMA French 5-Factor Dataset

In [5]:
# Download and extract the data
url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"
ff_factors = pd.read_csv(url, skiprows=3)

ff_factors.columns = ['Date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
ff_factors.dropna(inplace=True)

ff_factors['Date'] = pd.to_datetime(ff_factors['Date'], format='%Y%m%d')
ff_factors.set_index('Date', inplace=True)

# Filter data starting from 1963
ff_factors = ff_factors[ff_factors.index >= '1963-01-01']
print(ff_factors)

            Mkt-RF   SMB   HML   RMW   CMA     RF
Date                                             
1963-07-01   -0.67  0.02 -0.35  0.03  0.13  0.012
1963-07-02    0.79 -0.28  0.28 -0.08 -0.21  0.012
1963-07-03    0.63 -0.18 -0.10  0.13 -0.25  0.012
1963-07-05    0.40  0.09 -0.28  0.07 -0.30  0.012
1963-07-08   -0.63  0.07 -0.20 -0.27  0.06  0.012
...            ...   ...   ...   ...   ...    ...
2024-08-26   -0.34  0.33  0.16  0.13 -0.06  0.022
2024-08-27    0.06 -0.90  0.02  0.27  0.23  0.022
2024-08-28   -0.67 -0.22  1.14  0.55 -0.16  0.022
2024-08-29    0.08  0.67  0.28 -0.15 -1.22  0.022
2024-08-30    0.98 -0.56  0.04 -0.20 -0.14  0.022

[15397 rows x 6 columns]


### 3. Average Daily Sales (ADS) Data

In [6]:
# ADS_Index File
ads_index = pd.read_csv("data/ADS_Index.csv")
ads_index.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
ads_index

Unnamed: 0,Date,ADS_Index
0,1960:03:01,-0.578899
1,1960:03:02,-0.627759
2,1960:03:03,-0.673502
3,1960:03:04,-0.716147
4,1960:03:05,-0.755708
...,...,...
23597,2024:10:08,-0.222386
23598,2024:10:09,-0.214301
23599,2024:10:10,-0.207277
23600,2024:10:11,-0.201311
