# Fetching data from the last point every 5 days auto

In [120]:
# !pip install schedule

In [32]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import datetime
import schedule
import time
import os

# Function to fetch news data from the API
def fetch_news_data(function, ticker, topics, api_key, limit, time_from, sort):
    
    # Check if the input string has a length of 15 characters
    if len(time_from) == 15:
        time_from = time_from[:-2]  # Remove the last two characters

    url = 'https://www.alphavantage.co/query?function='+function+'&tickers='+ticker+'&topics='+topics+'&apikey='+api_key+'&limit='+limit+'&time_from='+time_from+'&sort='+sort
    r = requests.get(url)
    data = r.json()
    return data


# Function to update the CSV file with new news data
def update_news_data_for_ticker(ticker, start_datetime, end_datetime):
    
    # Define parameters
    function = "NEWS_SENTIMENT"
    topics = "technology"
    api_key = ""
    limit = "1000"
    sort = "EARLIEST"

    # Read the existing CSV file or create a new one
    if os.path.exists(ticker + '_News_Content.csv'):
        existing_df = pd.read_csv(ticker + '_News_Content.csv')
        latest_fetched_datetime = existing_df['time_published'].max()
    else:
        latest_fetched_datetime = (datetime.datetime.now() - datetime.timedelta(days=6))
                
    print(latest_fetched_datetime)
        
    # Fetch new news data starting from the latest fetched date-time
    new_data = fetch_news_data(function, ticker, topics, api_key, limit, latest_fetched_datetime, sort)
    new_df = pd.DataFrame(new_data['feed'])

    new_df = cleaning_fetched_data(new_df, ticker)
    
    if not os.path.exists(ticker + '_News_Content.csv'):
        # Save the DataFrame to a CSV file if the file doesn't exist
        new_df.to_csv(ticker + '_News_Content.csv', index=False)
        print(f"{len(new_df)} new records added to the {ticker}_News_Content.csv file.")
    elif os.path.exists(ticker + '_News_Content.csv'):
        # Save the new data to the CSV file in "append" mode
        new_df.to_csv(ticker + '_News_Content.csv', mode='a', index=False, header=False)
        print(f"{len(new_df)} new records added to the {ticker}_News_Content.csv file.")
    else:
        print(f"No new records to add for {ticker}.")

def cleaning_fetched_data(new_df, ticker):

    #removing unwanted columns
    columns_to_remove = ['url', 'authors','banner_image','source', 'category_within_source', 'source_domain', 'topics']
    new_df.drop(columns_to_remove, axis=1, inplace=True)

    # Extract 'ticker' and 'relevance_score' using apply() and lambda function
    new_df['ticker'] = new_df['ticker_sentiment'].apply(lambda x: x[0]['ticker'])
    new_df['relevance_score'] = new_df['ticker_sentiment'].apply(lambda x: float(x[0]['relevance_score']))
    new_df['ticker_sentiment_score'] = new_df['ticker_sentiment'].apply(lambda x: float(x[0]['ticker_sentiment_score']))
    new_df['ticker_sentiment_label'] = new_df['ticker_sentiment'].apply(lambda x: x[0]['ticker_sentiment_label'])

    new_df.drop('ticker_sentiment', axis=1, inplace=True)

    # dropping rows if they are not equal to the ticker
    for index, row in new_df.iterrows():
        if str(new_df['ticker'][index]) != ticker:
            new_df.drop([index], axis=0, inplace=True)

    #resetting indexes after dropping certain rows        
    new_df.reset_index(drop=True, inplace=True)

    return new_df
    
    
# List of stock tickers
tickers = ["AAPL", "MSFT", "ORCL"]  # Add more tickers as needed

# Function to perform the initial data update for all tickers
def initial_data_update():
    for ticker in tickers:
        update_news_data_for_ticker(ticker, "", "")


# Schedule the automatic update every 5 days
for ticker in tickers:
    schedule.every(5).days.do(update_news_data_for_ticker, ticker, "", "")

# Perform the initial data update
initial_data_update()

# Run the scheduled tasks
while True:
    schedule.run_pending()
    time.sleep(1)

20230815T041008
1 new records added to the AAPL_News_Content.csv file.
20230815T050000
2 new records added to the MSFT_News_Content.csv file.
20230814T160110
1 new records added to the ORCL_News_Content.csv file.


KeyboardInterrupt: 

In [30]:
import pandas as pd
# Read the CSV file and store the data in a DataFrame
df = pd.read_csv('MSFT_News_Content.csv')

# Display the first few rows of the DataFrame
df[-2:]

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
21587,The Savings Banks Group will invest more than ...,20230815T050000,"This autumn, the Savings Banks Group will laun...",0.361509,Bullish,MSFT,0.143843,0.122369,Neutral
21588,The Savings Banks Group will invest more than ...,20230815T050000,"This autumn, the Savings Banks Group will laun...",0.365676,Bullish,MSFT,0.144275,0.122579,Neutral


# Changing the position of a column

In [19]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('AAPL_News.csv')

# Store the column to be moved
column_to_move = df['time_published']

# Delete the column from its current position
df.drop('time_published', axis=1, inplace=True)

# Insert the column at the desired position (2nd column)
df.insert(1, 'time_published', column_to_move)

# Save the modified DataFrame back to the CSV file
df.to_csv('AAPL_News.csv', index=False)

In [20]:
import pandas as pd

df = pd.read_csv('AAPL_News.csv')

df.tail()

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
3896,Apple's iPhone 15 To Have Biggest Upgrade In 3...,20230730T163411,The countdown to the launch of the next iterat...,0.239554,Somewhat-Bullish,AAPL,0.998692,0.549215,Bullish
3897,Morning Bid: Asian markets face tough act to f...,20230730T214800,"NEW YORK, July 31 ( Reuters ) - A look at the ...",0.138551,Neutral,AAPL,0.085936,0.0,Neutral
3898,Asia shares extend gains; wary eye on Japan yi...,20230731T003106,Asia shares extend gains. wary eye on Japan yi...,0.139685,Neutral,AAPL,0.059636,0.090999,Neutral
3899,Asia shares extend gains; wary eye on Japan yi...,20230731T005524,The early impetus for shares was positive foll...,0.139744,Neutral,AAPL,0.061159,0.095007,Neutral
3900,Monitor Your iPhone Battery Like A Pro: Add Ba...,20230731T060834,Have you ever found yourself anxiously glancin...,0.095952,Neutral,AAPL,0.310843,0.315044,Somewhat-Bullish


# Combining two csv files

In [27]:
import pandas as pd

# Read the first CSV file
df1 = pd.read_csv('AAPL_News.csv')

# Read the second CSV file
df2 = pd.read_csv('AAPL_News_Content.csv')

# Concatenate the DataFrames vertically
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_df.to_csv('Combined_AAPL_News_data.csv', index=False)


In [28]:
import pandas as pd

df = pd.read_csv('Combined_AAPL_News_data.csv')

df.tail()

Unnamed: 0,title,time_published,summary,overall_sentiment_score,overall_sentiment_label,ticker,relevance_score,ticker_sentiment_score,ticker_sentiment_label
3960,IPhone maker Foxconn cuts 2023 sales forecast ...,20230814T095302,Hon Hai Precision Industry Co. now expects 202...,-0.137477,Neutral,AAPL,0.059785,-0.185027,Somewhat-Bearish
3961,"Trump Son-In-Law's Saudi Deal, Cybertruck's Br...",20230814T112352,From political controversies surrounding Donal...,-0.062705,Neutral,AAPL,0.275867,0.016546,Neutral
3962,Apple's 2023 Holiday Season Looks Cloudy: Key ...,20230814T124641,Apple Inc. AAPL could be headed for a weak hol...,-0.156551,Somewhat-Bearish,AAPL,0.592742,-0.021551,Neutral
3963,Ford Appoints Former Apple Executive Peter Ste...,20230814T164848,Ford Motor Company F hired former Apple Inc. A...,0.126641,Neutral,AAPL,0.885652,0.017029,Neutral
3964,Foxconn spends more to accelerate global migra...,20230815T041008,Foxconn Technology Group has finally joined ti...,0.0573,Neutral,AAPL,0.042122,-0.131207,Neutral
