In [43]:
import pandas as pd
def read_text_file(file_path,printflag=0):
    try:
        with open(file_path, 'r') as file:
            # Read the entire file contents into a string
            file_contents = file.read()
        if printflag:
            print("File contents:")
            print(file_contents)
        return file_contents
    except FileNotFoundError:
        return f"File not found: {file_path}"
    except Exception as e:
        return f"An error occurred: {str(e)}"

  


In [32]:
# stock data
import yfinance as yf
import os

def download_stock_data(company_name, start_date, end_date, data_dir):
    # Create a directory to store the data if it doesn't exist
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    
    # Define the data file path
    data_file_path = f"{data_dir}/{company_name}_stock_data.csv"
    
    if os.path.isfile(data_file_path):
        print(f"Data for {company_name} already downloaded. Loading from {data_file_path}")
        stock_data = yf.download(company_name, start=start_date, end=end_date)
        return stock_data
    else:
        print(f"Downloading data for {company_name} from Yahoo Finance...")
        stock_data = yf.download(company_name, start=start_date, end=end_date)
        stock_data.to_csv(data_file_path)
        print(f"Data downloaded and saved to {data_file_path}")
        return stock_data

# Define the parameters
company_name = "AAPL"
start_date = "2022-10-31"
end_date = "2023-10-31"
data_directory = "../../data/stock_data"

# Call the function to download or load the data
stock_data = download_stock_data(company_name, start_date, end_date, data_directory)

# Now you have the data in the `aapl_data` DataFrame
print(stock_data)



Data for AAPL already downloaded. Loading from ../../data/stock_data/AAPL_stock_data.csv


[*********************100%%**********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-10-31  153.160004  154.240005  151.919998  153.339996  152.435684   
2022-11-01  155.080002  155.449997  149.130005  150.649994  149.761536   
2022-11-02  148.949997  152.169998  145.000000  145.029999  144.174698   
2022-11-03  142.059998  142.800003  138.750000  138.880005  138.060974   
2022-11-04  142.089996  142.669998  134.380005  138.380005  137.792114   
...                ...         ...         ...         ...         ...   
2023-10-24  173.050003  173.669998  171.449997  173.440002  173.440002   
2023-10-25  171.880005  173.059998  170.649994  171.100006  171.100006   
2023-10-26  170.369995  171.380005  165.669998  166.889999  166.889999   
2023-10-27  166.910004  168.960007  166.830002  168.220001  168.220001   
2023-10-30  169.020004  171.169998  168.869

In [33]:
# exploring the stock data
stock_data
type(stock_data) #pandas dataframe
stock_data.shape #(251,6)
# stock_data.head()
stock_data.describe()

stock_data.columns #['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
stock_data.index #['2022-10-31', ..., '2023-10-30']

# Accessing a specific row by label (using .loc)
stock_data.loc['2023-10-30']

# Accessing a specific row by integer index (using .iloc)
stock_data.iloc[2]  # Access the third row (0-based index)

# --------------------------
# Accessing a Specific Column:

# Using square brackets
stock_data['Volume']

# Using dot notation
stock_data.Open
# ----------------------




Date
2022-10-31    153.160004
2022-11-01    155.080002
2022-11-02    148.949997
2022-11-03    142.059998
2022-11-04    142.089996
                 ...    
2023-10-24    173.050003
2023-10-25    171.880005
2023-10-26    170.369995
2023-10-27    166.910004
2023-10-30    169.020004
Name: Open, Length: 251, dtype: float64

In [66]:
# news data
# Usage example:
from newsapi import NewsApiClient
api_path = '../../data/newsapi_key.txt'
api_key= read_text_file(api_path,1).strip()
newsapi = NewsApiClient(api_key=api_key)


File contents:
0f2340d896c54adfbf54f91b9875a6d9



In [92]:

# print(api_key)
start_date = "2023-10-05"
end_date = "2023-10-30"
news_articles = newsapi.get_everything(
    q=company_name,
    from_param=start_date,
    to=end_date,
    language="en",
    sort_by="publishedAt",
    page_size=10
)

# Access the list of articles
articles = news_articles['articles']
news_data=pd.DataFrame(articles)

In [97]:
# for i in news_articles:
    # print(i)
news_data.content

0    To gain an edge, this is what you need to know...
1    Shares of Snap Inc. \r\n SNAP,\r\n +4.30%\r\nr...
2    LONDON Facebook and Instagram users in Europe ...
3    Apple CEO Tim Cook and a team of Apple executi...
4    Apple CEO Tim Cook\r\nApple's fourth fiscal qu...
5    US stocks rallied on Monday after a bruising s...
6    Shares of Walt Disney Co. \r\n DIS,\r\n +1.70%...
7    Shares of Verizon Communications Inc. \r\n VZ,...
8    The "Magnificent Seven" mega-cap tech names th...
9    Testifying in the biggest U.S. antitrust case ...
Name: content, dtype: object

In [95]:
print(len(news_articles['articles']))
# print(articles)

10
[{'source': {'id': None, 'name': 'Biztoc.com'}, 'author': 'benzinga.com', 'title': 'Some Chinese Abandoning Apple, Stock Buying On No Response From Iran, All Eyes On Bank Of Japan', 'description': 'To gain an edge, this is what you need to know today. Please click here for a chart of Apple Inc AAPL. • The Morning Capsule is about the big picture, not an individual stock. The chart of AAPL stock is being used to illustrate the point. • This morning inves…', 'url': 'https://biztoc.com/x/d9a3355a2a89ff80', 'urlToImage': 'https://c.biztoc.com/p/d9a3355a2a89ff80/s.webp', 'publishedAt': '2023-10-30T21:52:09Z', 'content': 'To gain an edge, this is what you need to know today.Please click here for a chart of Apple Inc AAPL.The Morning Capsule is about the big picture, not an individual stock. The chart of AAPL stock is … [+273 chars]'}, {'source': {'id': None, 'name': 'MarketWatch'}, 'author': 'MarketWatch Automation', 'title': 'Company Close Updates: Snap Inc. stock outperforms competitors

In [80]:
type(news_data) #pandas dataframe
news_data.shape #(10,8)
news_data.head()
# news_data.describe()

news_data.columns #['source', 'author', 'title', 'description', 'url', 'urlToImage', 'publishedAt', 'content']
# text1=news_data.iloc[0]

Index(['source', 'author', 'title', 'description', 'url', 'urlToImage',
       'publishedAt', 'content'],
      dtype='object')

In [79]:
news_data.shape #(10,8)


(10, 8)

In [81]:
news_data.to_csv('../../data/news_data.csv', index=False)  # Use your desired file path
