In [2]:
# --- Imports for Task 2 ---
import pandas as pd
import talib
import matplotlib.pyplot as plt
import seaborn as sns
import os



In [3]:
# Listing the stock symbols
stock_symbols = ['TSLA', "NVDA", "META", "AMZN", "GOOG", "AAPL","MSFT"]

# data directory for the data
Data_Dir = '../data/'

# The directory where the plots will be placed
Plots_Dir = '../Plots-task2'
os.makedirs(Plots_Dir, exist_ok=True)

#settinng ploting style
sns.set_theme(style="whitegrid")

# setup_logging('task2_analysis.log') # If you use your custom logging
print(f"--- Starting Task 2: Technical Analysis for configured stock symbols ---")
print(f"Will analyze symbols: {', '.join(stock_symbols)}")
print(f"Data will be loaded from: {os.path.abspath(Data_Dir)}") # Shows the full path being used
print(f"Plots will be saved to: {os.path.abspath(Plots_Dir)}")


--- Starting Task 2: Technical Analysis for configured stock symbols ---
Will analyze symbols: TSLA, NVDA, META, AMZN, GOOG, AAPL, MSFT
Data will be loaded from: d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\data
Plots will be saved to: d:\Documents\Projects\10 Academy\Stock Market\financial-news-analysis\Plots-task2


In [5]:
# (Keep your imports and initial configuration as they are)

# --- Function to load and prepare data for a single stock ---
def load_data(symbol, data_dir): # Renamed to match your function name
    csv_filename = f"{symbol}_historical_data.csv"
    csv_filepath = os.path.join(data_dir, csv_filename)
    print(f"\nLoading: {symbol} from {csv_filepath}")

    try:
        df = pd.read_csv(csv_filepath)
        if df.empty:
            print(f"  No data in {csv_filename}. Skipping.")
            return None

        # --- Date Column Processing ---
        possible_date_columns = ['Date', 'date', 'Datetime', 'timestamp', 'Timestamp']
        date_col_name = None
        for col_name_iter in possible_date_columns: # Renamed loop variable
            if col_name_iter in df.columns:
                date_col_name = col_name_iter
                break
        
        if date_col_name is None:
            print(f"  Error: Date column not found in {csv_filename}. Available: {df.columns.tolist()}. Skipping.")
            return None
        
        try:
            df[date_col_name] = pd.to_datetime(df[date_col_name])
            df.set_index(date_col_name, inplace=True)
            print(f"  Set '{date_col_name}' as DatetimeIndex for {symbol}.")
        except Exception as e_date:
            print(f"  Error processing date column '{date_col_name}' for {symbol}: {e_date}. Skipping.")
            return None

        # --- OHLCV Column Renaming and Selection ---
        print(f"  Initial columns for {symbol}: {df.columns.tolist()}") # DEBUG: See original columns

        # ** YOU MUST ADJUST THIS MAP BASED ON YOUR CSVs **
        # This map should take your *actual current column names* and map them to the *desired standard names*.
        column_rename_map = {
            # 'Original Open Name': 'Open',
            # 'Original High Name': 'High',
            # 'Original Low Name': 'Low',
            # 'Adj Close': 'Close', # Only if you want 'Adj Close' to *become* 'Close'
            # 'Original Volume Name': 'Volume',
        }

        # Decide which 'Close' to use if multiple exist (e.g., 'Close' and 'Adj Close')
        # Preference: Use 'Adj Close' as 'Close' if available. If not, use 'Close' if available.
        if 'Adj Close' in df.columns:
            if 'Close' in df.columns and 'Adj Close' != 'Close': # If 'Close' also exists and is different
                print(f"  Both 'Close' and 'Adj Close' found for {symbol}. Prioritizing 'Adj Close' and renaming it to 'Close'. Dropping original 'Close'.")
                df.drop(columns=['Close'], inplace=True, errors='ignore') # Drop original 'Close' if it exists
            column_rename_map['Adj Close'] = 'Close'
        elif 'Close' in df.columns:
            print(f"  'Close' column found for {symbol}. Will use it as is.")
            # No rename needed if 'Close' is already the target name and it exists
        else:
            print(f"  Error: Neither 'Close' nor 'Adj Close' found for {symbol}. Cannot determine closing price. Skipping.")
            return None
            
        # Rename other columns if necessary (Open, High, Low, Volume)
        # Example: if your open price is 'Open Price'
        # if 'Open Price' in df.columns: column_rename_map['Open Price'] = 'Open'
        # Add similar logic for High, Low, Volume based on your CSVs.

        df.rename(columns=column_rename_map, inplace=True)
        print(f"  Columns after rename for {symbol}: {df.columns.tolist()}") # DEBUG: See columns after rename

        required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
        
        # Ensure standard columns exist after renaming, potentially renaming lowercase versions
        for req_col in required_cols:
            if req_col not in df.columns and req_col.lower() in df.columns:
                df.rename(columns={req_col.lower(): req_col}, inplace=True)
                print(f"  Renamed lowercase '{req_col.lower()}' to '{req_col}' for {symbol}.")
        
        print(f"  Columns before final selection for {symbol}: {df.columns.tolist()}") # DEBUG

        # Check for duplicate column names *before* selection, especially for 'Close'
        if isinstance(df.columns, pd.MultiIndex):
            print(f"  Warning: DataFrame has a MultiIndex for columns for {symbol}. This is unexpected.")
        elif df.columns.duplicated().any():
            print(f"  Warning: Duplicate column names found for {symbol} BEFORE final selection: {df.columns[df.columns.duplicated()].tolist()}")
            # Attempt to resolve: keep first occurrence if duplicates are an issue for selection
            # This is a basic fix; ideally, the renaming logic should prevent duplicates of required_cols
            df = df.loc[:, ~df.columns.duplicated(keep='first')]
            print(f"    Attempted to remove duplicate columns, keeping first. New columns: {df.columns.tolist()}")


        missing_final_cols = [col for col in required_cols if col not in df.columns]
        if missing_final_cols:
            print(f"  Error: Missing required OHLCV columns for {symbol} after all processing: {missing_final_cols}. Available: {df.columns.tolist()}. Skipping.")
            return None
        
        # Select only the standardized columns to ensure order and uniqueness for these 5
        df_selected = df[required_cols].copy() # Use a new variable for clarity

        df_selected.sort_index(inplace=True) # Sort by date

        # --- Missing Value Handling & Type Conversion ---
        for col in required_cols: # Iterate over the known unique required_cols
            # df_selected[col] should now always be a Series
            if col not in df_selected.columns: # Should not happen if previous checks are good
                print(f"  Critical error: Column {col} missing before to_numeric for {symbol}. This indicates a logic flaw.")
                return None

            df_selected[col] = pd.to_numeric(df_selected[col], errors='coerce') # This is where the error occurred
            
            if df_selected[col].isnull().any():
                print(f"  Column '{col}' in {symbol} has NaNs. Filling...")
                df_selected[col].fillna(method='ffill', inplace=True)
                df_selected[col].fillna(method='bfill', inplace=True)

        df_selected.dropna(subset=required_cols, inplace=True)

        if df_selected.empty:
            print(f"  No valid data remaining for {symbol} after cleaning. Skipping.")
            return None

        print(f"  Successfully prepared data for {symbol}. Shape: {df_selected.shape}")
        return df_selected # Return the selected and cleaned DataFrame

    except FileNotFoundError:
        print(f"  Error: File not found: {csv_filepath}. Skipping.")
        return None
    except Exception as e:
        print(f"  Outer Error loading or processing {symbol}: {e}. Skipping.")
        import traceback
        traceback.print_exc()
        return None



In [6]:
# === BLOCK 3: LOAD DATA FOR ALL SYMBOLS ===
all_stocks_data = {}
for sym_iter in stock_symbols: # Changed loop variable
    # Call load_data without date filter arguments
    stock_df_loaded = load_data(sym_iter, Data_Dir) # Changed variable name
    if stock_df_loaded is not None and not stock_df_loaded.empty:
        all_stocks_data[sym_iter] = stock_df_loaded

if not all_stocks_data:
    print("\nNo stock data loaded successfully. Exiting script.")
    # exit() # Comment out exit() during debugging to see full output for all files
else:
    print(f"\nData loaded for {len(all_stocks_data)} stocks: {list(all_stocks_data.keys())}")



Loading: TSLA from ../data/TSLA_historical_data.csv
  Set 'Date' as DatetimeIndex for TSLA.
  Initial columns for TSLA: ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
  Both 'Close' and 'Adj Close' found for TSLA. Prioritizing 'Adj Close' and renaming it to 'Close'. Dropping original 'Close'.
  Columns after rename for TSLA: ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
  Columns before final selection for TSLA: ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
  Successfully prepared data for TSLA. Shape: (3545, 5)

Loading: NVDA from ../data/NVDA_historical_data.csv
  Set 'Date' as DatetimeIndex for NVDA.
  Initial columns for NVDA: ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
  Both 'Close' and 'Adj Close' found for NVDA. Prioritizing 'Adj Close' and renaming it to 'Close'. Dropping original 'Close'.
  Columns after rename for NVDA: ['Open', 'High', 'Low', '

In [7]:
# === BLOCK 4: CALCULATE INDICATORS AND PLOT FOR EACH STOCK ===
# (This block should work if load_data returns correctly structured DataFrames)
# Make sure to use the correct DataFrame variable from the loop (e.g., df_stock from your previous version)

for symbol_loop_var, df_stock_loop_var in all_stocks_data.items(): # Changed loop variables
    print(f"\n\n--- Processing: {symbol_loop_var} ---")

    # --- Calculate TA-Lib Indicators ---
    try:
        print(f"  Calculating TA indicators for {symbol_loop_var}...")
        df_stock_loop_var['SMA_20'] = talib.SMA(df_stock_loop_var['Close'], timeperiod=20)
        df_stock_loop_var['SMA_50'] = talib.SMA(df_stock_loop_var['Close'], timeperiod=50)
        df_stock_loop_var['SMA_200'] = talib.SMA(df_stock_loop_var['Close'], timeperiod=200) if len(df_stock_loop_var) >= 200 else pd.NA
        
        df_stock_loop_var['RSI_14'] = talib.RSI(df_stock_loop_var['Close'], timeperiod=14)
        
        macd, macdsignal, macdhist = talib.MACD(df_stock_loop_var['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        df_stock_loop_var['MACD'] = macd
        df_stock_loop_var['MACD_Signal'] = macdsignal
        df_stock_loop_var['MACD_Hist'] = macdhist
        print(f"  Indicators calculated for {symbol_loop_var}.")
    except Exception as e:
        print(f"  Error calculating TA indicators for {symbol_loop_var}: {e}. Skipping plots for this stock.")
        indicator_cols = ['SMA_20', 'SMA_50', 'SMA_200', 'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist']
        for ind_col in indicator_cols: 
            if ind_col not in df_stock_loop_var.columns: df_stock_loop_var[ind_col] = pd.NA
        
    # --- Plotting (ensure df_stock_loop_var is used) ---
    # Plot 1: Price and SMAs
    print(f"  Plotting Price & SMAs for {symbol_loop_var}...")
    plt.figure(figsize=(14, 7))
    plt.plot(df_stock_loop_var.index, df_stock_loop_var['Close'], label='Close Price', color='blue', alpha=0.7)
    if 'SMA_20' in df_stock_loop_var.columns and not df_stock_loop_var['SMA_20'].isnull().all():
        plt.plot(df_stock_loop_var.index, df_stock_loop_var['SMA_20'], label='SMA 20', color='orange', ls='--')
    if 'SMA_50' in df_stock_loop_var.columns and not df_stock_loop_var['SMA_50'].isnull().all():
        plt.plot(df_stock_loop_var.index, df_stock_loop_var['SMA_50'], label='SMA 50', color='green', ls='--')
    if 'SMA_200' in df_stock_loop_var.columns and not df_stock_loop_var['SMA_200'].isnull().all():
        plt.plot(df_stock_loop_var.index, df_stock_loop_var['SMA_200'], label='SMA 200', color='red', ls='--')
    plt.title(f'{symbol_loop_var} Closing Price & Moving Averages')
    plt.xlabel('Date'); plt.ylabel('Price')
    plt.legend(); plt.grid(True); plt.tight_layout()
    plt.savefig(os.path.join(Plots_Dir, f'{symbol_loop_var}_price_sma.png')); plt.close()

    # Plot 2: RSI
    if 'RSI_14' in df_stock_loop_var.columns and not df_stock_loop_var['RSI_14'].isnull().all():
        print(f"  Plotting RSI for {symbol_loop_var}...")
        plt.figure(figsize=(14, 5))
        plt.plot(df_stock_loop_var.index, df_stock_loop_var['RSI_14'], label='RSI 14', color='purple')
        plt.axhline(70, color='red', ls='--', alpha=0.5, label='Overbought (70)')
        plt.axhline(30, color='green', ls='--', alpha=0.5, label='Oversold (30)')
        plt.fill_between(df_stock_loop_var.index, 70, 100, color='red', alpha=0.1)
        plt.fill_between(df_stock_loop_var.index, 0, 30, color='green', alpha=0.1)
        plt.title(f'{symbol_loop_var} Relative Strength Index (RSI)')
        plt.xlabel('Date'); plt.ylabel('RSI Value (0-100)')
        plt.ylim(0, 100); plt.legend(); plt.grid(True); plt.tight_layout()
        plt.savefig(os.path.join(Plots_Dir, f'{symbol_loop_var}_rsi.png')); plt.close()
    else:
        print(f"  RSI data for {symbol_loop_var} not plotted (column missing or all NA).")

    # Plot 3: MACD
    if 'MACD' in df_stock_loop_var.columns and not df_stock_loop_var['MACD'].isnull().all():
        print(f"  Plotting MACD for {symbol_loop_var}...")
        fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True, gridspec_kw={'height_ratios': [2, 1]})
        
        axes[0].plot(df_stock_loop_var.index, df_stock_loop_var['MACD'], label='MACD', color='blue')
        axes[0].plot(df_stock_loop_var.index, df_stock_loop_var['MACD_Signal'], label='Signal Line', color='red', ls='--')
        axes[0].set_ylabel('MACD Value'); axes[0].legend(loc='upper left'); axes[0].grid(True)

        hist_colors = ['green' if val >= 0 else 'red' for val in df_stock_loop_var['MACD_Hist'].fillna(0)]
        axes[1].bar(df_stock_loop_var.index, df_stock_loop_var['MACD_Hist'], label='MACD Histogram', color=hist_colors, alpha=0.7, width=1.0)
        axes[1].axhline(0, color='grey', ls='--', lw=0.8)
        axes[1].set_xlabel('Date'); axes[1].set_ylabel('MACD Histogram'); axes[1].legend(loc='upper left'); axes[1].grid(True)

        fig.suptitle(f'{symbol_loop_var} MACD Analysis', fontsize=16)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.savefig(os.path.join(Plots_Dir, f'{symbol_loop_var}_macd.png')); plt.close()
    else:
        print(f"  MACD data for {symbol_loop_var} not plotted (column missing or all NA).")

print(f"\n--- Task 2 Finished. Plots saved in {os.path.abspath(Plots_Dir)} ---")



--- Processing: TSLA ---
  Calculating TA indicators for TSLA...
  Indicators calculated for TSLA.
  Plotting Price & SMAs for TSLA...
  Plotting RSI for TSLA...
  Plotting MACD for TSLA...


--- Processing: NVDA ---
  Calculating TA indicators for NVDA...
  Indicators calculated for NVDA.
  Plotting Price & SMAs for NVDA...
  Plotting RSI for NVDA...
  Plotting MACD for NVDA...


--- Processing: META ---
  Calculating TA indicators for META...
  Indicators calculated for META.
  Plotting Price & SMAs for META...
  Plotting RSI for META...
  Plotting MACD for META...


--- Processing: AMZN ---
  Calculating TA indicators for AMZN...
  Indicators calculated for AMZN.
  Plotting Price & SMAs for AMZN...
  Plotting RSI for AMZN...
  Plotting MACD for AMZN...


--- Processing: GOOG ---
  Calculating TA indicators for GOOG...
  Indicators calculated for GOOG.
  Plotting Price & SMAs for GOOG...
  Plotting RSI for GOOG...
  Plotting MACD for GOOG...


--- Processing: AAPL ---
  Calculating