#### CLeaning training data( removing inconvertible rows of data and converting rest to date time )

In [16]:
import pandas as pd

# Read the data
df = pd.read_csv("stock_prices_training.csv")

# Strip spaces and convert Date Fetched column to datetime (Handles both date and timestamp formats)
df["Date Fetched"] = pd.to_datetime(df["Date Fetched"].str.strip(), errors='coerce')

# Check for any rows where Date Fetched couldn't be converted
if df["Date Fetched"].isna().sum() > 0:
    print("⚠️ Warning: Some dates could not be parsed. Check for inconsistencies.")

# Save cleaned data (optional)
df.to_csv("stock_prices_training_cleaned.csv", index=False)

# Display the first few rows
print(df.head())

  Stock Date Fetched  Last Price ($)
0  AAPL   2024-10-02          221.25
1  AAPL   2024-10-09          222.64
2  AAPL   2024-10-16          223.23
3  AAPL   2024-10-23          221.17
4  AAPL   2024-10-30          219.10


## SARIMA Model 

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from datetime import datetime, timedelta

# Read the Stock Prices Data
df = pd.read_csv("stock_prices_training_cleaned.csv", dtype={"Date Fetched": str})  # Read as string first
df["Date Fetched"] = df["Date Fetched"].str.strip()  # Remove any extra spaces
df["Date Fetched"] = pd.to_datetime(df["Date Fetched"], format="%Y-%m-%d")  # Convert to datetime

# Sort by Date (to ensure proper time series order)
df = df.sort_values(by=["Stock", "Date Fetched"]).reset_index(drop=True)

# Get Unique Stocks
unique_stocks = df["Stock"].unique()

# Store Predictions
predictions = []

today = datetime.today().strftime("%Y-%m-%d")  # Get today's date

# Iterate Over Each Stock to Apply SARIMA Model
for stock in unique_stocks:
    stock_data = df[df["Stock"] == stock]
    stock_data = stock_data.set_index("Date Fetched")["Last Price ($)"].dropna()
    
    # Check if there is enough data
    if len(stock_data) < 10:
        print(f"⚠️ Not enough data for {stock}, skipping...")
        continue
    
    # Fit SARIMA Model (Seasonal Order assumes weekly seasonality)
    try:
        model = SARIMAX(stock_data, order=(1,1,1), seasonal_order=(1,1,1,52), enforce_stationarity=False, enforce_invertibility=False)
        results = model.fit(disp=False)
        
        # Forecast Today's Price
        forecast = results.forecast(steps=1)
        predicted_price = forecast.iloc[0]
        predictions.append({"Stock": stock, "Predicted Price ($)": round(predicted_price, 2), "Date Predicted": today})
        
        print(f" Predicted Price for {stock} on {today}: ${predicted_price:.2f}")
    except Exception as e:
        print(f" Error processing {stock}: {e}")

# Convert Predictions to DataFrame
pred_df = pd.DataFrame(predictions)
print(pred_df)

# Save Predictions to CSV (Optional)
pred_df.to_csv("stock_price_predictions.csv", index=False)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for AAPL on 2025-04-05: $202.72


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for AMD on 2025-04-05: $95.17


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for AMZN on 2025-04-05: $179.24


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for AVGO on 2025-04-05: $156.33


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for BA on 2025-04-05: $152.57


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for BAC on 2025-04-05: $37.34


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for C on 2025-04-05: $63.45


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for CMCSA on 2025-04-05: $36.06


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for COP on 2025-04-05: $96.67


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for COST on 2025-04-05: $970.77


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for CVX on 2025-04-05: $157.34


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for DIS on 2025-04-05: $89.93


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for F on 2025-04-05: $9.66


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for GD on 2025-04-05: $270.94


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for GM on 2025-04-05: $46.14


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for GOOGL on 2025-04-05: $151.40


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for GS on 2025-04-05: $515.76


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for HD on 2025-04-05: $356.72


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for INTC on 2025-04-05: $23.47


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for JNJ on 2025-04-05: $160.32


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for JPM on 2025-04-05: $229.78


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for KO on 2025-04-05: $73.28


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for LCID on 2025-04-05: $2.34


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for LLY on 2025-04-05: $794.68


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for LMT on 2025-04-05: $456.57


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for META on 2025-04-05: $538.70


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for MNST on 2025-04-05: $60.28


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for MO on 2025-04-05: $57.99


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for MRNA on 2025-04-05: $26.14


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for MSFT on 2025-04-05: $375.52


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for NFLX on 2025-04-05: $927.02


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for NOC on 2025-04-05: $517.98


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for NVDA on 2025-04-05: $103.39


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for PEP on 2025-04-05: $151.91


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for PFE on 2025-04-05: $24.48


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for PG on 2025-04-05: $172.93


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for PSX on 2025-04-05: $108.11


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for QCOM on 2025-04-05: $140.65


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for RIVN on 2025-04-05: $11.68


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for RTX on 2025-04-05: $130.98


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for SLB on 2025-04-05: $39.60


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for T on 2025-04-05: $28.64


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for TGT on 2025-04-05: $94.18


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for TSLA on 2025-04-05: $268.38


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for TSM on 2025-04-05: $159.08


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for UNH on 2025-04-05: $547.35


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for VZ on 2025-04-05: $45.64


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for WFC on 2025-04-05: $66.17


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


 Predicted Price for WMT on 2025-04-05: $87.51
 Predicted Price for XOM on 2025-04-05: $113.43
    Stock  Predicted Price ($) Date Predicted
0    AAPL               202.72     2025-04-05
1     AMD                95.17     2025-04-05
2    AMZN               179.24     2025-04-05
3    AVGO               156.33     2025-04-05
4      BA               152.57     2025-04-05
5     BAC                37.34     2025-04-05
6       C                63.45     2025-04-05
7   CMCSA                36.06     2025-04-05
8     COP                96.67     2025-04-05
9    COST               970.77     2025-04-05
10    CVX               157.34     2025-04-05
11    DIS                89.93     2025-04-05
12      F                 9.66     2025-04-05
13     GD               270.94     2025-04-05
14     GM                46.14     2025-04-05
15  GOOGL               151.40     2025-04-05
16     GS               515.76     2025-04-05
17     HD               356.72     2025-04-05
18   INTC                23.47 

  return get_prediction_index(
  return get_prediction_index(


#### Arranging output in same order as stock universe

In [18]:
# Define the stock order from us_stock_universe
us_stock_universe = {
    "Technology": ["AAPL", "MSFT", "GOOGL", "NVDA", "META"],
    "Banking & Financials": ["JPM", "BAC", "WFC", "C", "GS"],
    "Consumer Goods": ["PG", "KO", "PEP", "MO", "MNST"],
    "Pharma & Healthcare": ["JNJ", "PFE", "MRNA", "LLY", "UNH"],
    "Energy": ["XOM", "CVX", "COP", "PSX", "SLB"],
    "Automobile": ["TSLA", "F", "GM", "RIVN", "LCID"],
    "Retail & E-commerce": ["AMZN", "WMT", "COST", "TGT", "HD"],
    "Aerospace & Defense": ["BA", "LMT", "NOC", "GD", "RTX"],
    "Telecom & Media": ["VZ", "T", "DIS", "NFLX", "CMCSA"],
    "Semiconductors": ["AMD", "TSM", "INTC", "AVGO", "QCOM"]
}

# Flatten the stock order list
stock_order = [stock for sector in us_stock_universe.values() for stock in sector]

# Load the CSV file
df = pd.read_csv("stock_price_predictions.csv")

# Ensure the stock column is treated as a category with the correct order
df["Stock"] = pd.Categorical(df["Stock"], categories=stock_order, ordered=True)

# Sort the dataframe by the defined stock order
df_sorted = df.sort_values("Stock")

# Save the sorted dataframe
df_sorted.to_csv("stock_price_predictions.csv", index=False)

print(" Data successfully saved in stock_price_predictions.csv in the correct sector and stock order.")


 Data successfully saved in stock_price_predictions.csv in the correct sector and stock order.


## Stock weight Assigning Model

In [19]:
import csv
import os

In [20]:
training_data=pd.read_csv("training_data.csv")

In [21]:
def fetch_row(file_name, row_index):
    with open(file_name, mode='r', newline='') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row
        for idx, row in enumerate(reader):
            if idx == row_index:
                return [float(x) if x.strip() else 0.0 for x in row[2:10]] 
    return None

In [22]:
def Descent_w(x, y, num_rows, w, b, k):
    if num_rows == 0:
        return 0  # Avoid division by zero

    dex_w = 0
    nan_detected = False
    nan_index = -1  # Store the first index where NaN occurs

    for j in range(num_rows):
        term = (np.dot(x[j], w) + b - y[j]) * x[j][k]
        if np.isnan(term):
            nan_detected = True
            nan_index = j  # Store the index of the first NaN value
        dex_w += term

    dex_w /= num_rows  # Average gradient

    # Debugging NaN values
    if nan_detected:  # If any NaN was found inside sum
        print(f"⚠ NaN detected in Descent_w at index {k} (j={nan_index})!")
        print(f"x[{nan_index}]: {x[nan_index]}, y[{nan_index}]: {y[nan_index]}, w: {w}, b: {b}")
        dex_w = 0.0  # Prevent NaN propagation
    return dex_w


In [23]:
def Descent_b(x, y, num_rows, w, b):
    if num_rows == 0:
        return 0  # Avoid division by zero

    dex_b = sum(np.dot(x[j], w) + b - y[j] for j in range(num_rows))
    dex_b /= num_rows  # Average gradient

    # Debugging NaN values
    if np.isnan(dex_b):
        print("NaN detected in Descent_b!")
        dex_b = 0.0  # Prevent NaN propagation

    return dex_b

In [24]:
def Descent(x, y, num_rows, w, b):
    if num_rows == 0:
        return 0  # Avoid division by zero

    dex = (sum(np.dot(x[j], w) + b - y[j] for j in range(num_rows)))**2
    dex /= (num_rows*2)  # Average gradient

    # Debugging NaN values
    if np.isnan(dex):
        print("NaN detected in Descent_b!")
        dex = 0.0  # Prevent NaN propagation

    return dex

In [25]:
b = np.zeros((1))
w = np.zeros(8)  # All weights start from 0
learn_rate=0.001

temp_b=0
temp_w=np.zeros(8)

num_rows = training_data.shape[0]
num_columns=8
x_train=np.zeros((num_rows,num_columns))
y_train=np.zeros(num_rows)

for i in range(num_rows):
    x_train[i] = fetch_row("training_data.csv", i)
y_train = pd.read_csv("training_data.csv")["Weight"]

mean = np.mean(x_train, axis=0)
std = np.std(x_train, axis=0)
std[std == 0] = 1  # Prevent division by zero
x_train = (x_train - mean) / std

for iter in range(3):
    for i in range(num_rows):
        learn_rate = 0.01 / (1 + 0.001 * i)
        for j in range(8):
            des=Descent_w(x_train,y_train,num_rows,w,b,j)
            temp_w[j]=w[j]-learn_rate*des
        des_b=learn_rate*Descent_b(x_train,y_train,num_rows,w,b)
        temp_b=b-des_b
        for j in range(8):
            w[j]=temp_w[j]
        b=temp_b
        print(f"descent = {Descent(x_train,y_train,num_rows,w,b)}")

  temp_w[j]=w[j]-learn_rate*des


descent = [0.00049005]
descent = [0.00048031]
descent = [0.00047077]
descent = [0.00046143]
descent = [0.00045228]
descent = [0.00044333]
descent = [0.00043456]
descent = [0.00042597]
descent = [0.00041756]
descent = [0.00040932]
descent = [0.00040126]
descent = [0.00039336]
descent = [0.00038562]
descent = [0.00037805]
descent = [0.00037063]
descent = [0.00036336]
descent = [0.00035624]
descent = [0.00034927]
descent = [0.00034244]
descent = [0.00033576]
descent = [0.0003292]
descent = [0.00032279]
descent = [0.0003165]
descent = [0.00031034]
descent = [0.00030431]
descent = [0.0002984]
descent = [0.00029261]
descent = [0.00028694]
descent = [0.00028139]
descent = [0.00027595]
descent = [0.00027061]
descent = [0.00026539]
descent = [0.00026027]
descent = [0.00025526]
descent = [0.00025034]
descent = [0.00024553]
descent = [0.00024081]
descent = [0.00023619]
descent = [0.00023166]
descent = [0.00022722]
descent = [0.00022287]
descent = [0.00021861]
descent = [0.00021444]
descent = [0.0

In [26]:
x_test=np.zeros((50,8))
ratio=np.zeros(50)

for i in range(50):
    x_test[i]=fetch_row("testing_data.csv",i)
    ratio[i]=np.dot(x_test[i],w)+b

mean = np.mean(x_test, axis=0)
std = np.std(x_test, axis=0)
std[std == 0] = 1  # Prevent division by zero
x_test = (x_test - mean) / std

final_result={
    "Stock": pd.read_csv("testing_data.csv")["Stock"],
    "Price":pd.read_csv("testing_data.csv")["Price"],
    "Weights":ratio
}
df=pd.DataFrame(final_result)
df.to_csv("results.csv",index=False)
w

  ratio[i]=np.dot(x_test[i],w)+b


array([-1.21047815e-11, -2.10051376e-11,  6.10272970e-11, -1.53588784e-11,
        5.73769570e-11, -6.01759802e-04,  4.16935844e-04, -1.21164876e-02])

In [27]:
def create_csv(filename="training_data.csv"):
    columns = ["Sector", "Stock", "Price", "P/E", "Sector Lowest", "52 Week High", "200d-EMA", "Alpha", "Beta", "Worth", "Weight"]
    if not os.path.exists(filename):
        df = pd.DataFrame(columns=columns)
        df.to_csv(filename, index=False)
        print(f"{filename} created successfully.")
    else:
        print(f"{filename} already exists.")

def update_csv(sector, stock, price, pe, sector_lowest, high_52, dema, alpha, beta, worth, weight, filename="training_data.csv"):
    data = {
        "Sector": sector,
        "Stock": stock,
        "Price": price,
        "P/E": pe,
        "Sector Lowest": sector_lowest,
        "52 Week High": high_52,
        "200d-EMA": dema,
        "Alpha": alpha,
        "Beta": beta,
        "Worth": worth,
        "Weight": weight
    }
    
    df = pd.DataFrame(data)
    df.to_csv(filename, mode='a', header=not os.path.exists(filename), index=False)
    print(f"Data appended to {filename} successfully.")

# Example Usage:
# Create the CSV file if not exists
create_csv()

# Example data to update the CSV file
update_csv(
    sector=pd.read_csv("testing_data.csv")["Sector"],
    stock=pd.read_csv("testing_data.csv")["Stock"],
    price=pd.read_csv("testing_data.csv")["Price"],
    pe=pd.read_csv("testing_data.csv")["P/E"],
    sector_lowest=pd.read_csv("testing_data.csv")["Sector Lowest"],
    high_52=pd.read_csv("testing_data.csv")["52 Week High"],
    dema=pd.read_csv("testing_data.csv")["200d-EMA"],
    alpha=pd.read_csv("testing_data.csv")["Alpha"],
    beta=pd.read_csv("testing_data.csv")["Beta"],
    worth=pd.read_csv("testing_data.csv")["Worth"],
    weight=ratio
)

training_data.csv already exists.
Data appended to training_data.csv successfully.


## Picking the 10 Best Stocks

In [28]:
results=pd.read_csv("results.csv")
chosen_index=[-1 for i in range(10)]
liquidity=pd.read_csv("liquidity.csv")["Amihud's Illiquidity Ratio"]
stock_data=pd.read_csv("stock_prices.csv")
stock_sector=stock_data["Sector"]
stock_name=stock_data["Stock"]
stock_price=stock_data["Last Price ($)"]

In [29]:
weights=results["Weights"]
num_row=len(weights)
weight_sum=0

In [30]:
for j in range(10):
    max=0
    max_index=-1
    for i in range(num_row):
        if not(i in chosen_index):
            if max<weights[i]:
                max=weights[i]
                max_index=i
    chosen_index[j]=max_index
    weight_sum+=weights[max_index]

In [31]:
data={
    "Sector":[stock_sector[i] for i in chosen_index],
    "Stock": [stock_name[i] for i in chosen_index],
    "Price": [stock_price[i] for i in chosen_index],
    "Liquidity": [liquidity[i] for i in chosen_index],
    "Weights": [weights[i]*100/weight_sum for i in chosen_index]
}
df = pd.DataFrame(data)
df.to_csv("Stock_final_investment.csv",index=False)