In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [3]:
df = pd.read_csv("S&P 1 year Data.csv", parse_dates = ["Date"])
df.set_index("Date" , inplace = True)
df.rename(columns={"Close/Last": "Close"}, inplace=True)

print(df.head())

              Close     Open     High      Low
Date                                          
2025-03-17  5675.12  5635.60  5703.52  5631.12
2025-03-14  5638.94  5563.85  5645.27  5563.85
2025-03-13  5521.52  5594.45  5597.78  5504.65
2025-03-12  5599.30  5624.84  5642.19  5546.09
2025-03-11  5572.07  5603.65  5636.30  5528.41


In [4]:
print(df.isnull().sum())

Close    0
Open     0
High     0
Low      0
dtype: int64


In [5]:
#Reverse the order of data
df = df.iloc[::-1]

In [6]:
#SMA = Pt/n where N = window size, or number of days, 
#Simple Moving Average (SMA) is a technical indicator used in financial markets to analyze trends 
#in stock prices by smoothing out short-term fluctuations.
def calculate_sma(prices, period):
    sma_values = []
    for i in range(len(prices)):
        if i< period - 1:
            sma_values.append(None)
        else:
            sma = np.mean(prices[i-period + 1 : i +1])
            sma_values.append(sma)
    return sma_values
df["SMA_5"] = df["Close"].rolling(window=5, min_periods=5).mean()
df["SMA_10"] = df["Close"].rolling(window=10, min_periods=10).mean()


In [7]:
df.head(10)

Unnamed: 0_level_0,Close,Open,High,Low,SMA_5,SMA_10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-18,5149.42,5154.77,5175.6,5145.47,,
2024-03-19,5178.51,5139.09,5180.31,5131.59,,
2024-03-20,5224.62,5181.69,5226.19,5171.55,,
2024-03-21,5241.53,5253.43,5261.1,5240.66,,
2024-03-22,5234.18,5242.48,5246.09,5229.87,5205.652,
2024-03-25,5218.19,5219.52,5229.09,5216.09,5219.406,
2024-03-26,5203.58,5228.85,5235.16,5203.42,5224.42,
2024-03-27,5248.49,5226.31,5249.26,5213.92,5229.194,
2024-03-28,5254.35,5248.03,5264.85,5245.82,5231.758,
2024-04-01,5243.77,5257.97,5263.95,5229.2,5233.676,5219.664


In [8]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="mean")  # Using mean to fill missing values
df[["SMA_5", "SMA_10"]] = imputer.fit_transform(df[["SMA_5", "SMA_10"]])
df.head(15)

Unnamed: 0_level_0,Close,Open,High,Low,SMA_5,SMA_10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-18,5149.42,5154.77,5175.6,5145.47,5632.652626,5636.891988
2024-03-19,5178.51,5139.09,5180.31,5131.59,5632.652626,5636.891988
2024-03-20,5224.62,5181.69,5226.19,5171.55,5632.652626,5636.891988
2024-03-21,5241.53,5253.43,5261.1,5240.66,5632.652626,5636.891988
2024-03-22,5234.18,5242.48,5246.09,5229.87,5205.652,5636.891988
2024-03-25,5218.19,5219.52,5229.09,5216.09,5219.406,5636.891988
2024-03-26,5203.58,5228.85,5235.16,5203.42,5224.42,5636.891988
2024-03-27,5248.49,5226.31,5249.26,5213.92,5229.194,5636.891988
2024-03-28,5254.35,5248.03,5264.85,5245.82,5231.758,5636.891988
2024-04-01,5243.77,5257.97,5263.95,5229.2,5233.676,5219.664


In [9]:
def calculate_ema(prices, period):
    ema_values = []
    alpha = 2/ (period+1)
    ema_prev = None
    for i in range(len(prices)):
        price = prices[i]
        if i< period -1:
            ema_values.append(None) # EMA 9 needs 9 data points. So datasets 0~8 will not be counted
        elif i == period - 1:
            sma = np.mean(prices[:period])
            ema_values.append(sma)
            ema_prev = sma
        else:
            ema = alpha *price +(1-alpha)*ema_prev
            ema_values.append(ema)
            ema_prev = ema
    return ema_values
df["EMA_9"] = calculate_ema(df["Close"].tolist(), 9)
df["EMA_12"] = calculate_ema(df["Close"].tolist(), 20)
#Now we have EMA. The most common EMA used in financial area are 9 and 20


In [59]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="mean")  # Using mean to fill missing values
df[["EMA_9", "EMA_12"]] = imputer.fit_transform(df[["EMA_9", "EMA_12"]])
df.head(10)


Unnamed: 0_level_0,Close,Open,High,Low,SMA_5,SMA_10,EMA_9,EMA_12
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-03-18,5149.42,5154.77,5175.6,5145.47,5149.42,5149.42,5149.42,5149.42
2024-03-19,5178.51,5139.09,5180.31,5131.59,5163.965,5163.965,5163.965,5163.965
2024-03-20,5224.62,5181.69,5226.19,5171.55,5184.183333,5184.183333,5184.183333,5184.183333
2024-03-21,5241.53,5253.43,5261.1,5240.66,5198.52,5198.52,5198.52,5198.52
2024-03-22,5234.18,5242.48,5246.09,5229.87,5205.652,5205.652,5205.652,5205.652
2024-03-25,5218.19,5219.52,5229.09,5216.09,5219.406,5207.741667,5219.406,5207.741667
2024-03-26,5203.58,5228.85,5235.16,5203.42,5224.42,5207.147143,5224.42,5207.147143
2024-03-27,5248.49,5226.31,5249.26,5213.92,5229.194,5212.315,5229.194,5212.315
2024-03-28,5254.35,5248.03,5264.85,5245.82,5231.758,5216.985556,5231.758,5216.985556
2024-04-01,5243.77,5257.97,5263.95,5229.2,5233.676,5219.664,5233.676,5219.664


In [72]:
#RSI, Relative Strong Index is an Indicator of historical strength or weakness of stock price on closing Period. Range is 0 to 100.
# Most commonly used time is 14 days.
def calculate_rsi_no_rolling(prices, period=14):
    deltas = prices.diff().tolist()
    rsi = []
    for i in range(len(deltas)):
        if i < period
            rsi.append(None)
        else
            gain = 0
            loss = 0
        for j in range(len(deltas)):
    rsi_values = []
    avg_gain = pd.Series(gain).rolling(window = period).mean()
    avg_loss = pd.Series(loss).rolling(window = period).mean()
    rs = avg_gain/avg_loss
    rsi = 100- (100/rs)
    return rsi