Fetch Data From yfinance and save it into a csv file

In [None]:
import yfinance as yf
import pandas as pd

companies = {
    "COMPANY NAME": "TICKER"
    # add more company names and tickers
}

def fetch_stock_data(tickers):
    all_data = []

    for company, ticker in tickers.items():
        print(f"Fetching data for {company} ({ticker})...")
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period="max")  

            hist['Company'] = company
            hist['Ticker'] = ticker  
            hist.reset_index(inplace=True)

            all_data.append(hist)
        except Exception as e:
            print(f"Error fetching data for {company} ({ticker}): {e}")

    combined_data = pd.concat(all_data, ignore_index=True)

    return combined_data

def save_data_to_csv(data, filename='stock_data.csv'):
    data.to_csv(filename, index=False)
    print(f"Stock data saved to {filename}")

def main():
    stock_data = fetch_stock_data(companies)
    save_data_to_csv(stock_data)

if __name__ == "__main__":
    main()


Merge Stock dataset and News dataset

In [None]:
# Code may need changes according to the dataset field names. datetime conversion maynot be necessary for your dataset.

import pandas as pd

def merge_datasets(stock_filename='stock_data.csv', news_filename='Balanced_Sentiment_Dataset.csv', output_filename='merged_data.csv'):

    stock_df = pd.read_csv(stock_filename)
    news_df = pd.read_csv(news_filename)

    stock_df['Date'] = pd.to_datetime(stock_df['Date'], errors='coerce', utc=True).dt.tz_localize(None).dt.date
    news_df['DATE'] = pd.to_datetime(news_df['DATE'], errors='coerce').dt.date

    news_df = news_df.rename(columns={'DATE': 'Date'})

    stock_df['Company'] = stock_df['Company'].str.lower()
    news_df['COMPANY NAME'] = news_df['COMPANY NAME'].str.lower()

    merged_df = pd.merge(news_df, stock_df, left_on=['Date', 'COMPANY NAME'], right_on=['Date', 'Company'], how='left')

    missing_stock_data = merged_df[merged_df['Open'].isna()]
    print(f"Rows with missing stock data before filling: {missing_stock_data.shape[0]}")

    ticker_mapping = stock_df[['Company', 'Ticker']].drop_duplicates().set_index('Company')['Ticker'].to_dict()
    merged_df['Ticker'] = merged_df['Ticker'].fillna(merged_df['COMPANY NAME'].map(ticker_mapping))

    for index, row in merged_df[merged_df['Open'].isna()].iterrows():
        next_day_data = stock_df[(stock_df['Company'] == row['COMPANY NAME']) & (stock_df['Date'] > row['Date'])]
        if not next_day_data.empty:
            next_day_row = next_day_data.iloc[0]
            merged_df.loc[index, ['Open', 'High', 'Low', 'Close']] = [
                next_day_row['Open'], next_day_row['High'], next_day_row['Low'], next_day_row['Close']
            ]

    missing_stock_data_after = merged_df[merged_df['Open'].isna()]
    print(f"Rows with missing stock data after filling: {missing_stock_data_after.shape[0]}")

    merged_df = merged_df[['COMPANY NAME', 'Ticker', 'Date', 'DETAILS', 'Open', 'High', 'Low', 'Close', 'sentiment_label', 'sentiment_score']]

    merged_df.to_csv(output_filename, index=False)
    print(f"Merged data saved to {output_filename}")

merge_datasets()