In [1]:
# Cell 1: Imports and Database Connection
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
import math
from sqlalchemy import create_engine
import pymysql
import tqdm

warnings.filterwarnings('ignore')
print("Libraries imported and connection configured.")

# Define Base Directory
base_dir = Path.cwd().parent 

# Database Configuration
db_config = {
    'host': '127.0.0.1',
    'user': 'root',
    'password': '',  
    'database': 'trading_system'
}
db_url = f"mysql+pymysql://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}"
engine = create_engine(db_url)

print(f"Project Base Directory: {base_dir}")

Libraries imported and connection configured.
Project Base Directory: c:\Users\18kyu\Desktop\Unishit\IR


In [2]:
# Cell 2: Load and Write Headlines
print("Loading and Writing Headlines")
try:
    final_headlines_path = base_dir / "1_IR_Data" / "2_Cleaned_Data" / "Cleaned_Sentiment_Final.csv"
    df_headlines_final = pd.read_csv(final_headlines_path)
    print(f"Loaded {len(df_headlines_final)} headlines from CSV.")
    
    # Prepare for database
    df_headlines_to_db = df_headlines_final[['index', 'headline', 'date', 'stock']]
    df_headlines_to_db.columns = ['original_index', 'headline', 'date', 'stock'] 
    
    print(f"Writing {len(df_headlines_to_db)} headlines to 'headlines' table.")
    df_headlines_to_db.to_sql('headlines', con=engine, if_exists='replace', index=False, chunksize=1000)
    print("Headlines table successfully written.")
    
except Exception as e:
    print(f"Error writing headlines: {e}")

Loading and Writing Headlines
Loaded 1147268 headlines from CSV.
Writing 1147268 headlines to 'headlines' table.
Headlines table successfully written.


In [5]:
# Cell 3: Load and Write Stock Prices
print("Loading and Writing Stock Prices")
try:
    price_file_path = base_dir / "1_IR_Data" / "3_Price_Data" / "Stock_Price_20250727_150046.csv"
    
    print(f"Loading prices from {price_file_path}...")
    
    chunk_size = 50000
    with open(price_file_path, 'r', encoding='utf-8') as f:
        total_rows = sum(1 for line in f) - 1 
    total_chunks = math.ceil(total_rows / chunk_size)
    print(f"Total rows to write: {total_rows:,}. Total chunks: {total_chunks}.")
    
    print(f"\nWriting stock prices to 'stock_prices' table (in chunks)...")
    chunk_iter = pd.read_csv(price_file_path, chunksize=chunk_size, on_bad_lines='skip') 
    
    first_chunk = True
    chunk_count = 0 
    rows_written = 0

    for chunk in chunk_iter:
        chunk_count += 1 
        
        chunk.replace([np.inf, -np.inf], np.nan, inplace=True) 
        chunk.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
        final_chunk = chunk[['date', 'ticker', 'open', 'high', 'low', 'close', 'volume']]
        
        if first_chunk:
            final_chunk.to_sql('stock_prices', con=engine, if_exists='replace', index=False, chunksize=1000)
            print(f"Saved chunk {chunk_count} of {total_chunks} (Table Replaced).")
            first_chunk = False
        else:
            final_chunk.to_sql('stock_prices', con=engine, if_exists='append', index=False, chunksize=1000)
            print(f"Saved chunk {chunk_count} of {total_chunks}.")
            
        rows_written += len(chunk)

    print(f"\nStock prices table successfully written. Total rows: {rows_written:,}")
    
except Exception as e:
    print(f"Error writing stock prices to database: {e}")

Loading and Writing Stock Prices
Loading prices from c:\Users\18kyu\Desktop\Unishit\IR\1_IR_Data\3_Price_Data\Stock_Price_20250727_150046.csv...
Total rows to write: 8,986,458. Total chunks: 180.

Writing stock prices to 'stock_prices' table (in chunks)...
Saved chunk 1 of 180 (Table Replaced).
Saved chunk 2 of 180.
Saved chunk 3 of 180.
Saved chunk 4 of 180.
Saved chunk 5 of 180.
Saved chunk 6 of 180.
Saved chunk 7 of 180.
Saved chunk 8 of 180.
Saved chunk 9 of 180.
Saved chunk 10 of 180.
Saved chunk 11 of 180.
Saved chunk 12 of 180.
Saved chunk 13 of 180.
Saved chunk 14 of 180.
Saved chunk 15 of 180.
Saved chunk 16 of 180.
Saved chunk 17 of 180.
Saved chunk 18 of 180.
Saved chunk 19 of 180.
Saved chunk 20 of 180.
Saved chunk 21 of 180.
Saved chunk 22 of 180.
Saved chunk 23 of 180.
Saved chunk 24 of 180.
Saved chunk 25 of 180.
Saved chunk 26 of 180.
Saved chunk 27 of 180.
Saved chunk 28 of 180.
Saved chunk 29 of 180.
Saved chunk 30 of 180.
Saved chunk 31 of 180.
Saved chunk 32 of 180.

In [7]:
# Cell 4: Load and Write Sentiment Scores
print("Loading and Writing Sentiment Scores")
try:
    sentiment_file_path = base_dir / "1_IR_Data" / "4_Sentiment_Scores" / "Sentiment_Scores_Complete.csv"
    df_scores = pd.read_csv(sentiment_file_path)
    print(f"Loaded {len(df_scores)} scores from CSV.")
    
    mapping_df = pd.read_sql("SELECT id, original_index FROM headlines", con=engine)
    mapping_dict = pd.Series(mapping_df.id.values, index=mapping_df.original_index).to_dict()
    
    df_scores['headline_id'] = df_scores['index'].map(mapping_dict)
    
    final_scores = df_scores[[
        'headline_id', 'textblob_polarity', 'textblob_subjectivity', 
        'vader_compound', 'vader_positive', 'vader_negative', 'vader_neutral', 
        'finbert_compound', 'finbert_positive', 'finbert_negative', 'finbert_neutral', 'finbert_label'
    ]]
    final_scores = final_scores.dropna(subset=['headline_id']) 
    
    print(f"Writing {len(final_scores)} scores to 'sentiment_scores' table.")
    final_scores.to_sql('sentiment_scores', con=engine, if_exists='replace', index=False, chunksize=1000)
    print("Sentiment scores table successfully written.")

except Exception as e:
    print(f"Error writing sentiment scores: {e}")

print("\n Database loading complete.")

Loading and Writing Sentiment Scores
Loaded 1274576 scores from CSV.
Writing 1141860 scores to 'sentiment_scores' table.
Sentiment scores table successfully written.

 Database loading complete.
