In [1]:
from datetime import datetime
from pyspark.sql import SparkSession

In [2]:
spark=SparkSession.builder.appName('Stock-Data-Analysis').getOrCreate()

23/03/26 08:58:46 WARN Utils: Your hostname, PJ-Ubuntu resolves to a loopback address: 127.0.1.1; using 10.0.0.251 instead (on interface wlo1)
23/03/26 08:58:46 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/03/26 08:58:48 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
spark

In [7]:
# Read the dataset 
df = spark.read.option('header', 'true').csv("./Data/FS_sp500_Value.csv").drop("_c0")
zts_df = df.select("*").where("ticker == 'ZTS'")
zts_df.show()

                                                                                

root
 |-- summary: string (nullable = true)
 |-- Ticker: string (nullable = true)
 |-- Date: string (nullable = true)
 |-- High: string (nullable = true)
 |-- Low: string (nullable = true)
 |-- Open: string (nullable = true)
 |-- Close: string (nullable = true)
 |-- Volume: string (nullable = true)
 |-- Adj Close: string (nullable = true)



                                                                                

+------+----------+------------------+------------------+------------------+------------------+--------+------------------+
|Ticker|      Date|              High|               Low|              Open|             Close|  Volume|         Adj Close|
+------+----------+------------------+------------------+------------------+------------------+--------+------------------+
|   ZTS|2013-02-01|31.739999771118164|30.469999313354492|              31.5|31.010000228881836|66789100|29.048494338989258|
|   ZTS|2013-02-04|31.989999771118164|30.760000228881836| 31.09000015258789|31.020000457763672| 7695400|29.057863235473633|
|   ZTS|2013-02-05|31.979999542236328|30.850000381469727|             31.25|31.040000915527344| 5013200| 29.07659339904785|
|   ZTS|2013-02-06| 31.43000030517578|             30.75|30.979999542236328|31.030000686645508| 2126100|29.067224502563477|
|   ZTS|2013-02-07| 32.72999954223633|              31.0|              31.0|              32.0| 3800800|29.975875854492188|
|   ZTS|

In [92]:
# Analysis functions 
from pyspark.sql import functions as F
from pyspark.sql.window import Window

def calcMovingAvg(df, col):
  # 14 day periods
  window = Window.partitionBy("Ticker").orderBy("Date").rowsBetween(-14, 0)
  df = df.withColumn("moving_avg", F.avg(col).over(window))
  return df.rdd.map(lambda x: x.moving_avg).collect()

def calcDailyPercentChange(df):
  window = Window.partitionBy("Ticker").orderBy("Ticker")
  df = df.withColumn("prev_value", F.lag(df.Close).over(window))
  df = df.withColumn("change", F.when(F.isnull( (df.Close - df.prev_value)/df.prev_value ), 0).otherwise(F.round(F.abs((df.Close - df.prev_value)/df.prev_value) *100, 3)) )
  return df.rdd.map(lambda x: x.change).collect()


def calcATR(df):
  window = Window.partitionBy("Ticker").orderBy("Ticker")
  df = df.withColumn("prev_value", F.lag(df.Close).over(window))
  df = df.withColumn("h-l", df.High-df.Low)
  df = df.withColumn("h-p", F.when(F.isnull( F.abs(df.High-df.prev_value)), 0).otherwise( F.abs(df.High-df.prev_value))) 
  df = df.withColumn("l-p", F.when(F.isnull( F.abs(df.Low-df.prev_value)), 0).otherwise( F.abs(df.Low-df.prev_value))) 
  df = df.withColumn("true_range", F.greatest("h-l", "h-p", "l-p"))
  return calcMovingAvg(df, "true_range")

In [94]:
print("Moving Average closing of ZTS:", calcMovingAvg(zts_df, 'Close'))
print("Daily Percent Change of ZTS: ", calcDailyPercentChange(zts_df))
print("ATR of ZTS: ", calcATR(zts_df))

Moving Average closing of ZTS: [31.010000228881836, 31.015000343322754, 31.02333386739095, 31.02500057220459, 31.22000045776367, 31.525000254313152, 31.772857121058873, 32.01875019073486, 32.18888897365994, 32.297000122070315, 32.45000006935813, 32.56583340962728, 32.57769247201773, 32.576428822108674, 32.57733357747396, 32.648000208536786, 32.71600023905436, 32.80133349100749, 32.962666829427086, 33.136000061035155, 33.22266667683919, 33.291333516438804, 33.298666636149086, 33.36600011189778, 33.420666758219404, 33.434000142415364, 33.43266677856445, 33.51800003051758, 33.6566665649414, 33.71333312988281, 33.76466649373372, 33.8153330485026, 33.894666290283205, 33.895999654134116, 33.797333017985025, 33.67133305867513, 33.55266621907552, 33.47866643269857, 33.40133310953776, 33.30133310953776, 33.192666371663414, 33.106666310628256, 32.96266632080078, 32.783999633789065, 32.749332936604816, 32.78933283487956, 32.81266632080078, 32.78933308919271, 32.77333297729492, 32.71466649373372, 