# Fetching data from the last point every 5 days auto

In [120]:
# !pip install schedule

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import datetime
import schedule
import time
import os

# Function to fetch news data from the API
def fetch_news_data(function, ticker, topics, api_key, limit, time_from, sort):
    
    # Check if the input string has a length of 15 characters
    if len(time_from) == 15:
        time_from = time_from[:-2]  # Remove the last two characters

    url = 'https://www.alphavantage.co/query?function='+function+'&tickers='+ticker+'&topics='+topics+'&apikey='+api_key+'&limit='+limit+'&time_from='+time_from+'&sort='+sort
    r = requests.get(url)
    data = r.json()
    return data


# Function to update the CSV file with new news data
def update_news_data_for_ticker(ticker, start_datetime, end_datetime):
    
    # Define parameters
    function = "NEWS_SENTIMENT"
    topics = "technology"
    api_key = ""
    limit = "1000"
    sort = "EARLIEST"

    # Read the existing CSV file or create a new one
    if os.path.exists(ticker + '_News_Content.csv'):
        existing_df = pd.read_csv(ticker + '_News_Content.csv')
        latest_fetched_datetime = existing_df['time_published'].max()
#         print(latest_fetched_datetime)
        timestamp_format = "%Y%m%dT%H%M%S"
        # Convert timestamp string to datetime object
        timestamp_dt = datetime.datetime.strptime(latest_fetched_datetime, timestamp_format)
        # Add one minute to the datetime object
        updated_timestamp_dt = timestamp_dt + datetime.timedelta(minutes=1)
        # Convert the updated datetime object back to the desired format
        latest_fetched_datetime = updated_timestamp_dt.strftime(timestamp_format)

    else:
        latest_fetched_datetime = (datetime.datetime.now() - datetime.timedelta(days=6))
                
#     print(latest_fetched_datetime)
    
    # Fetch new news data starting from the latest fetched date-time
    new_data = fetch_news_data(function, ticker, topics, api_key, limit, latest_fetched_datetime, sort)
    new_df = pd.DataFrame(new_data['feed'])

    new_df = cleaning_fetched_data(new_df, ticker)
    
    if not os.path.exists(ticker + '_News_Content.csv'):
        # Save the DataFrame to a CSV file if the file doesn't exist
        new_df.to_csv(ticker + '_News_Content.csv', index=False)
        print(f"{len(new_df)} new records added to the {ticker}_News_Content.csv file.")
    elif os.path.exists(ticker + '_News_Content.csv'):
        # Save the new data to the CSV file in "append" mode
        new_df.to_csv(ticker + '_News_Content.csv', mode='a', index=False, header=False)
        print(f"{len(new_df)} new records added to the {ticker}_News_Content.csv file.")
    else:
        print(f"No new records to add for {ticker}.")

def cleaning_fetched_data(new_df, ticker):

    #removing unwanted columns
    columns_to_remove = ['url', 'authors','banner_image','source', 'category_within_source', 'source_domain', 'topics']
    new_df.drop(columns_to_remove, axis=1, inplace=True)

    # Extract 'ticker' and 'relevance_score' using apply() and lambda function
    new_df['ticker'] = new_df['ticker_sentiment'].apply(lambda x: x[0]['ticker'])
    new_df['relevance_score'] = new_df['ticker_sentiment'].apply(lambda x: float(x[0]['relevance_score']))
    new_df['ticker_sentiment_score'] = new_df['ticker_sentiment'].apply(lambda x: float(x[0]['ticker_sentiment_score']))
    new_df['ticker_sentiment_label'] = new_df['ticker_sentiment'].apply(lambda x: x[0]['ticker_sentiment_label'])

    new_df.drop('ticker_sentiment', axis=1, inplace=True)

    # dropping rows if they are not equal to the ticker
    for index, row in new_df.iterrows():
        if str(new_df['ticker'][index]) != ticker:
            new_df.drop([index], axis=0, inplace=True)

    #resetting indexes after dropping certain rows        
    new_df.reset_index(drop=True, inplace=True)

    return new_df
    
    
# List of stock tickers
tickers = ["AAPL", "MSFT", "ORCL"]  # Add more tickers as needed

# Function to perform the initial data update for all tickers
def initial_data_update():
    for ticker in tickers:
        update_news_data_for_ticker(ticker, "", "")

# Function to run the tasks daily and stay idle
def run_daily_tasks():
    while True:
        # Perform the tasks for each ticker
        for ticker in tickers:
            update_news_data_for_ticker(ticker, "", "")
        
        # Wait for the next day before running the tasks again
        time.sleep(24 * 60 * 60)  # Sleep for 24 hours

# Run the initial data update
initial_data_update()

# Schedule the daily tasks to start at a specific time
schedule.every().day.at("00:10").do(run_daily_tasks)

# Run the scheduled tasks
while True:
    schedule.run_pending()
    time.sleep(1)

35 new records added to the AAPL_News_Content.csv file.
245 new records added to the MSFT_News_Content.csv file.
6 new records added to the ORCL_News_Content.csv file.


KeyboardInterrupt: 

# Reading the CSV

In [4]:
import pandas as pd
# Read the CSV file and store the data in a DataFrame
df = pd.read_csv('ORCL_News_Content.csv')

# Display the first few rows of the DataFrame
df.head(10)

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
0,Oracle Reports After The Close On 3/10 - Optio...,20220308T182025,"According to NextEarningsDate.com, the Oracle ...",0.069867,Neutral,ORCL,0.925567,0.0066,Neutral
1,Oracle Whale Trades For March 08,20220308T203358,A whale with a lot of money to spend has taken...,0.187695,Somewhat-Bullish,ORCL,0.432734,-0.107121,Neutral
2,Here's How Much You Would Have Made Owning Ora...,20220310T180433,Oracle ( NYSE:ORCL ) has outperformed the ma...,0.054314,Neutral,ORCL,0.972797,0.054314,Neutral
3,"Oracle Stock Slides After Q3 EPS Miss, Falling...",20220310T211913,Oracle Corporation ( NYSE: ORCL ) reported t...,-0.034086,Neutral,ORCL,0.724687,-0.00118,Neutral
4,Oracle stock drops after earnings fall short o...,20220310T212500,Oracle Corp. shares fell in the extended sessi...,-0.080954,Neutral,ORCL,0.853521,-0.080954,Neutral
5,Oracle Shares Slide as Earnings Match Estimates,20220310T215200,Oracle shares are heading lower after the ente...,-0.296718,Somewhat-Bearish,ORCL,0.928707,-0.296718,Somewhat-Bearish
6,Oracle ( ORCL ) Q3 Earnings Miss Estimates,20220310T222508,Oracle (ORCL) delivered earnings and revenue s...,0.013136,Neutral,ORCL,0.377998,0.001136,Neutral
7,Uncertainty Prevails as Indexes Turn South Again,20220310T224800,We are mostly exhausted of most economic reads...,-0.26052,Somewhat-Bearish,ORCL,0.276596,-0.189404,Somewhat-Bearish
8,Oracle Stock Little Changed as Earnings Match ...,20220310T225300,Oracle Stock Is Little Changed. Earnings Match...,-0.090848,Neutral,ORCL,0.900719,-0.090848,Neutral
9,Human Capital Management Market to Reach USD 4...,20220311T120055,"Pune, India, March 11, 2022 ( GLOBE NEWSWIR...",0.056842,Neutral,ORCL,0.028599,0.000453,Neutral


# Changing the position of a column

In [19]:
# import pandas as pd

# # Read the CSV file
# df = pd.read_csv('AAPL_News.csv')

# # Store the column to be moved
# column_to_move = df['time_published']

# # Delete the column from its current position
# df.drop('time_published', axis=1, inplace=True)

# # Insert the column at the desired position (2nd column)
# df.insert(1, 'time_published', column_to_move)

# # Save the modified DataFrame back to the CSV file
# df.to_csv('AAPL_News.csv', index=False)

In [20]:
# import pandas as pd

# df = pd.read_csv('AAPL_News.csv')

# df.tail()

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
3896,Apple's iPhone 15 To Have Biggest Upgrade In 3...,20230730T163411,The countdown to the launch of the next iterat...,0.239554,Somewhat-Bullish,AAPL,0.998692,0.549215,Bullish
3897,Morning Bid: Asian markets face tough act to f...,20230730T214800,"NEW YORK, July 31 ( Reuters ) - A look at the ...",0.138551,Neutral,AAPL,0.085936,0.0,Neutral
3898,Asia shares extend gains; wary eye on Japan yi...,20230731T003106,Asia shares extend gains. wary eye on Japan yi...,0.139685,Neutral,AAPL,0.059636,0.090999,Neutral
3899,Asia shares extend gains; wary eye on Japan yi...,20230731T005524,The early impetus for shares was positive foll...,0.139744,Neutral,AAPL,0.061159,0.095007,Neutral
3900,Monitor Your iPhone Battery Like A Pro: Add Ba...,20230731T060834,Have you ever found yourself anxiously glancin...,0.095952,Neutral,AAPL,0.310843,0.315044,Somewhat-Bullish


# Combining two csv files

In [27]:
# import pandas as pd

# # Read the first CSV file
# df1 = pd.read_csv('AAPL_News.csv')

# # Read the second CSV file
# df2 = pd.read_csv('AAPL_News_Content.csv')

# # Concatenate the DataFrames vertically
# combined_df = pd.concat([df1, df2], ignore_index=True)

# # Save the combined DataFrame to a new CSV file
# combined_df.to_csv('Combined_AAPL_News_data.csv', index=False)


In [28]:
# import pandas as pd

# df = pd.read_csv('Combined_AAPL_News_data.csv')

# df.tail()

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
3960,IPhone maker Foxconn cuts 2023 sales forecast ...,20230814T095302,Hon Hai Precision Industry Co. now expects 202...,-0.137477,Neutral,AAPL,0.059785,-0.185027,Somewhat-Bearish
3961,"Trump Son-In-Law's Saudi Deal, Cybertruck's Br...",20230814T112352,From political controversies surrounding Donal...,-0.062705,Neutral,AAPL,0.275867,0.016546,Neutral
3962,Apple's 2023 Holiday Season Looks Cloudy: Key ...,20230814T124641,Apple Inc. AAPL could be headed for a weak hol...,-0.156551,Somewhat-Bearish,AAPL,0.592742,-0.021551,Neutral
3963,Ford Appoints Former Apple Executive Peter Ste...,20230814T164848,Ford Motor Company F hired former Apple Inc. A...,0.126641,Neutral,AAPL,0.885652,0.017029,Neutral
3964,Foxconn spends more to accelerate global migra...,20230815T041008,Foxconn Technology Group has finally joined ti...,0.0573,Neutral,AAPL,0.042122,-0.131207,Neutral


# Deleteing last few rows of a dataframe

In [6]:
import pandas as pd

# Load the CSV into a DataFrame
csv_file = 'AAPL_News_Content.csv'
df = pd.read_csv(csv_file)

# Number of rows to delete
num_rows_to_delete = 1

# Delete the last N rows
df = df.iloc[:-num_rows_to_delete]

# Save the modified DataFrame back to the CSV file
df.to_csv(csv_file, index=False)

print("Last", num_rows_to_delete, "rows deleted from the "+ csv_file +" CSV.")

Last 1 rows deleted from the AAPL_News_Content.csv CSV.


# Deleting duplicating rows

In [25]:
import pandas as pd

# Load the CSV into a DataFrame
csv_file = 'ORCL_News_Content.csv'
df = pd.read_csv(csv_file)

df.drop_duplicates(inplace=True, keep='first')

# Save the modified DataFrame back to the CSV file
df.to_csv(csv_file, index=False)

print("Removed duplicating rows from "+ csv_file +" file")

Removed duplicating rows from ORCL_News_Content.csv file


In [26]:
import pandas as pd

# Load the CSV into a DataFrame
csv_file = 'ORCL_News_Content.csv'
df = pd.read_csv(csv_file)

df.tail(10)

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
576,Oracle ( ORCL ) Gains As Market Dips: What Y...,20230815T214513,Oracle (ORCL) closed the most recent trading d...,0.12525,Neutral,ORCL,0.677342,0.238844,Somewhat-Bullish
577,5 Bullish Trades You Can Make in the Next Month,20230817T230240,It's amazing what can happen in just a few wee...,0.205157,Somewhat-Bullish,ORCL,0.331765,0.206804,Somewhat-Bullish
578,Here is What to Know Beyond Why Oracle Corpora...,20230818T130010,Oracle (ORCL) has been one of the stocks most ...,0.228772,Somewhat-Bullish,ORCL,0.521151,0.15322,Somewhat-Bullish
579,Looking At Oracle's Recent Unusual Options Act...,20230818T170106,A whale with a lot of money to spend has taken...,0.083157,Neutral,ORCL,0.79415,0.228118,Somewhat-Bullish
580,Oracle Unusual Options Activity - Oracle ( NY...,20230821T161545,A whale with a lot of money to spend has taken...,0.183738,Somewhat-Bullish,ORCL,0.701875,0.303711,Somewhat-Bullish
581,Oracle ( ORCL ) Stock Moves -0.04%: What You...,20230822T214515,Oracle (ORCL) closed at $116.54 in the latest ...,0.175957,Somewhat-Bullish,ORCL,0.631282,0.308694,Somewhat-Bullish
582,How Is The Market Feeling About Oracle? - Orac...,20230825T143018,Oracle's ORCL short percent of float has falle...,0.241618,Somewhat-Bullish,ORCL,0.224903,0.085754,Neutral
583,Open Source Services Market is Anticipated to ...,20230828T120936,"New York, USA, Aug. 28, 2023 ( GLOBE NEWSWIRE ...",0.218068,Somewhat-Bullish,ORCL,0.033609,0.152613,Somewhat-Bullish
584,Oracle ( ORCL ) Outpaces Stock Market Gains:...,20230828T214512,Oracle (ORCL) closed at $116.84 in the latest ...,0.233317,Somewhat-Bullish,ORCL,0.617188,0.375819,Bullish
585,Why Oracle Stock Trounced the Market Today,20230829T211946,"The veteran tech company's stock is now a buy,...",0.324043,Somewhat-Bullish,ORCL,0.722944,0.586185,Bullish
