In [119]:
import os
import requests
from pyspark.sql.functions import lit
from pyspark.sql import SparkSession, functions as f

In [120]:
from dotenv import load_dotenv
load_dotenv()
key=os.environ.get("KEY")
spark = SparkSession.builder.getOrCreate()

Amazon.com Inc (AMZN)

In [121]:
symbol = 'AMZN'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Amazon.com Inc'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("Time Series (Daily).2022-12-22.`1. open`"), \
            f.col("Time Series (Daily).2022-12-22.`2. high`"), \
            f.col("Time Series (Daily).2022-12-22.`3. low`"), \
            f.col("Time Series (Daily).2022-12-22.`4. close`"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

amzn_com_df = add_company_col.toDF("open","high","low","close","company_name")
amzn_com_df.show()

+-----+-----+-----+-----+--------------+
| open| high|  low|close|  company_name|
+-----+-----+-----+-----+--------------+
|85.52|85.68|82.25|83.79|Amazon.com Inc|
+-----+-----+-----+-----+--------------+



In [122]:
symbol = 'AMZN'
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Amazon.com Inc'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("PERatio"),f.col("GrossProfitTTM"))
add_company_col =clean_data.withColumn("company_name", lit(company_name))
amzn_com_df1 = add_company_col.toDF("PERatio","GrossProfitTTM","companyname")
amzn_com_df1.show()

+-------+--------------+--------------+
|PERatio|GrossProfitTTM|   companyname|
+-------+--------------+--------------+
|  76.12|  197478000000|Amazon.com Inc|
+-------+--------------+--------------+



In [123]:
symbol = 'AMZN'
url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Amazon.com Inc'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("annualReports.operatingCashflow"),f.col("annualReports.profitLoss"),f.col("annualReports.netIncome"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

amzn_com_cash_flow = add_company_col.toDF("operatingCashflow","profitLoss",'netIncome',"companyname")
amzn_com_cash_flow_f = amzn_com_cash_flow.withColumn("operatingCashflow", amzn_com_cash_flow["operatingCashflow"].getItem(1))\
                .withColumn("profitLoss", amzn_com_cash_flow["profitLoss"].getItem(1))\
                .withColumn("netIncome", amzn_com_cash_flow["netIncome"].getItem(1))
amzn_com_cash_flow_f.show()

+-----------------+-----------+-----------+--------------+
|operatingCashflow| profitLoss|  netIncome|   companyname|
+-----------------+-----------+-----------+--------------+
|      66064000000|21331000000|21331000000|Amazon.com Inc|
+-----------------+-----------+-----------+--------------+



In [124]:
with_peratio=amzn_com_df.join(amzn_com_df1,amzn_com_df.company_name ==  amzn_com_df1.companyname,"inner")
amzn_com=with_peratio.withColumnRenamed('PERatio', 'PERatio').withColumnRenamed('GrossProfitTTM', 'profit_margin')
amzn_com=amzn_com.select('open','high','low','close','PERatio','profit_margin','company_name')
amzn_com.show(truncate=False)

+-----+-----+-----+-----+-------+-------------+--------------+
|open |high |low  |close|PERatio|profit_margin|company_name  |
+-----+-----+-----+-----+-------+-------------+--------------+
|85.52|85.68|82.25|83.79|76.12  |197478000000 |Amazon.com Inc|
+-----+-----+-----+-----+-------+-------------+--------------+



                                                                                

In [125]:
with_cash_flow=amzn_com.join(amzn_com_cash_flow_f,amzn_com.company_name ==  amzn_com_cash_flow_f.companyname,"inner")
amzn_with_cash_flow=with_cash_flow.select('open','high','low','close','PERatio','profit_margin','operatingCashflow','profitLoss','netIncome','company_name')
amzn_with_cash_flow.show()



+-----+-----+-----+-----+-------+-------------+-----------------+-----------+-----------+--------------+
| open| high|  low|close|PERatio|profit_margin|operatingCashflow| profitLoss|  netIncome|  company_name|
+-----+-----+-----+-----+-------+-------------+-----------------+-----------+-----------+--------------+
|85.52|85.68|82.25|83.79|  76.12| 197478000000|      66064000000|21331000000|21331000000|Amazon.com Inc|
+-----+-----+-----+-----+-------+-------------+-----------------+-----------+-----------+--------------+



                                                                                

Intel Corporation (INTC)

In [126]:
symbol = 'INTC'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Intel Corporation'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("Time Series (Daily).2022-12-22.`1. open`"), \
            f.col("Time Series (Daily).2022-12-22.`2. high`"), \
            f.col("Time Series (Daily).2022-12-22.`3. low`"), \
            f.col("Time Series (Daily).2022-12-22.`4. close`"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

intel_com_df = add_company_col.toDF("open","high","low","close","company_name")
intel_com_df.show()

+-----+------+-----+-----+-----------------+
| open|  high|  low|close|     company_name|
+-----+------+-----+-----+-----------------+
|26.45|26.452|25.35|25.97|Intel Corporation|
+-----+------+-----+-----+-----------------+



In [127]:
symbol = 'INTC'
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Intel Corporation'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("PERatio"),f.col("GrossProfitTTM"))
add_company_col =clean_data.withColumn("company_name", lit(company_name))
intel_com_df1 = add_company_col.toDF("PERatio","GrossProfitTTM","companyname")
intel_com_df1.show()

+-------+--------------+-----------------+
|PERatio|GrossProfitTTM|      companyname|
+-------+--------------+-----------------+
|   8.08|   43815000000|Intel Corporation|
+-------+--------------+-----------------+



In [128]:
symbol = 'INTC'
url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Intel Corporation'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("annualReports.operatingCashflow"),f.col("annualReports.profitLoss"),f.col("annualReports.netIncome"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

intel_com_cash_flow = add_company_col.toDF("operatingCashflow","profitLoss",'netIncome',"companyname")
intel_com_cash_flow_f = intel_com_cash_flow.withColumn("operatingCashflow", intel_com_cash_flow["operatingCashflow"].getItem(1))\
                .withColumn("profitLoss", intel_com_cash_flow["profitLoss"].getItem(1))\
                .withColumn("netIncome", intel_com_cash_flow["netIncome"].getItem(1))
intel_com_cash_flow_f.show()

+-----------------+-----------+-----------+-----------------+
|operatingCashflow| profitLoss|  netIncome|      companyname|
+-----------------+-----------+-----------+-----------------+
|      35384000000|20899000000|20899000000|Intel Corporation|
+-----------------+-----------+-----------+-----------------+



In [129]:
with_peratio=intel_com_df.join(intel_com_df1,intel_com_df.company_name ==  intel_com_df1.companyname,"inner")
intel_com=with_peratio.withColumnRenamed('PERatio', 'PERatio').withColumnRenamed('GrossProfitTTM', 'profit_margin')
intel_com=intel_com.select('open','high','low','close','PERatio','profit_margin','company_name')
intel_com.show(truncate=False)

+-----+------+-----+-----+-------+-------------+-----------------+
|open |high  |low  |close|PERatio|profit_margin|company_name     |
+-----+------+-----+-----+-------+-------------+-----------------+
|26.45|26.452|25.35|25.97|8.08   |43815000000  |Intel Corporation|
+-----+------+-----+-----+-------+-------------+-----------------+



                                                                                

In [130]:
with_cash_flow=intel_com.join(intel_com_cash_flow_f,intel_com.company_name ==  intel_com_cash_flow_f.companyname,"inner")
intel_with_cash_flow=with_cash_flow.select('open','high','low','close','PERatio','profit_margin','operatingCashflow','profitLoss','netIncome','company_name')
intel_with_cash_flow.show()



+-----+------+-----+-----+-------+-------------+-----------------+-----------+-----------+-----------------+
| open|  high|  low|close|PERatio|profit_margin|operatingCashflow| profitLoss|  netIncome|     company_name|
+-----+------+-----+-----+-------+-------------+-----------------+-----------+-----------+-----------------+
|26.45|26.452|25.35|25.97|   8.08|  43815000000|      35384000000|20899000000|20899000000|Intel Corporation|
+-----+------+-----+-----+-------+-------------+-----------------+-----------+-----------+-----------------+



                                                                                

JPMorgan Chase & Co (JPM)

In [131]:
symbol = 'JPM'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'JPMorgan Chase & Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("Time Series (Daily).2022-12-22.`1. open`"), \
            f.col("Time Series (Daily).2022-12-22.`2. high`"), \
            f.col("Time Series (Daily).2022-12-22.`3. low`"), \
            f.col("Time Series (Daily).2022-12-22.`4. close`"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

jp_com_df = add_company_col.toDF("open","high","low","close","company_name")
jp_com_df.show()

+-----+-----+------+------+-------------------+
| open| high|   low| close|       company_name|
+-----+-----+------+------+-------------------+
|131.1|131.3|128.41|130.66|JPMorgan Chase & Co|
+-----+-----+------+------+-------------------+



In [132]:
symbol = 'JPM'
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'JPMorgan Chase & Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("PERatio"),f.col("GrossProfitTTM"))
add_company_col =clean_data.withColumn("company_name", lit(company_name))
jp_com_df1 = add_company_col.toDF("PERatio","GrossProfitTTM","companyname")
jp_com_df1.show()

+-------+--------------+-------------------+
|PERatio|GrossProfitTTM|        companyname|
+-------+--------------+-------------------+
|  11.08|  130898000000|JPMorgan Chase & Co|
+-------+--------------+-------------------+



In [133]:
symbol = 'JPM'
url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'JPMorgan Chase & Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("annualReports.operatingCashflow"),f.col("annualReports.profitLoss"),f.col("annualReports.netIncome"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

jpm_com_cash_flow = add_company_col.toDF("operatingCashflow","profitLoss",'netIncome',"companyname")
jpm_com_cash_flow_f = jpm_com_cash_flow.withColumn("operatingCashflow", jpm_com_cash_flow["operatingCashflow"].getItem(1))\
                .withColumn("profitLoss", jpm_com_cash_flow["profitLoss"].getItem(1))\
                .withColumn("netIncome", jpm_com_cash_flow["netIncome"].getItem(1))
jpm_com_cash_flow_f.show()

+-----------------+-----------+-----------+-------------------+
|operatingCashflow| profitLoss|  netIncome|        companyname|
+-----------------+-----------+-----------+-------------------+
|     -79910000000|29131000000|29131000000|JPMorgan Chase & Co|
+-----------------+-----------+-----------+-------------------+



In [134]:
with_peratio=jp_com_df.join(jp_com_df1,jp_com_df.company_name ==  jp_com_df1.companyname,"inner")
jp_com=with_peratio.withColumnRenamed('PERatio', 'PERatio').withColumnRenamed('GrossProfitTTM', 'profit_margin')
jp_com=jp_com.select('open','high','low','close','PERatio','profit_margin','company_name')
jp_com.show(truncate=False)

+-----+-----+------+------+-------+-------------+-------------------+
|open |high |low   |close |PERatio|profit_margin|company_name       |
+-----+-----+------+------+-------+-------------+-------------------+
|131.1|131.3|128.41|130.66|11.08  |130898000000 |JPMorgan Chase & Co|
+-----+-----+------+------+-------+-------------+-------------------+



                                                                                

In [135]:
with_cash_flow=jp_com.join(jpm_com_cash_flow_f,jp_com.company_name ==  jpm_com_cash_flow_f.companyname,"inner")
jpm_with_cash_flow=with_cash_flow.select('open','high','low','close','PERatio','profit_margin','operatingCashflow','profitLoss','netIncome','company_name')
jpm_with_cash_flow.show()



+-----+-----+------+------+-------+-------------+-----------------+-----------+-----------+-------------------+
| open| high|   low| close|PERatio|profit_margin|operatingCashflow| profitLoss|  netIncome|       company_name|
+-----+-----+------+------+-------+-------------+-----------------+-----------+-----------+-------------------+
|131.1|131.3|128.41|130.66|  11.08| 130898000000|     -79910000000|29131000000|29131000000|JPMorgan Chase & Co|
+-----+-----+------+------+-------+-------------+-----------------+-----------+-----------+-------------------+



                                                                                

Coca-Cola Co (KO)

In [139]:
symbol = 'KO'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Coca-Cola Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("Time Series (Daily).2022-12-22.`1. open`"), \
            f.col("Time Series (Daily).2022-12-22.`2. high`"), \
            f.col("Time Series (Daily).2022-12-22.`3. low`"), \
            f.col("Time Series (Daily).2022-12-22.`4. close`"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

coca_com_df = add_company_col.toDF("open","high","low","close","company_name")
coca_com_df.show()

+-----+-----+------+-----+------------+
| open| high|   low|close|company_name|
+-----+-----+------+-----+------------+
|63.42|63.59|62.645|63.34|Coca-Cola Co|
+-----+-----+------+-----+------------+



In [140]:
symbol = 'KO'
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Coca-Cola Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("PERatio"),f.col("GrossProfitTTM"))
add_company_col =clean_data.withColumn("company_name", lit(company_name))
coca_com_df1 = add_company_col.toDF("PERatio","GrossProfitTTM","companyname")
coca_com_df1.show()

+-------+--------------+------------+
|PERatio|GrossProfitTTM| companyname|
+-------+--------------+------------+
|  27.99|   23298000000|Coca-Cola Co|
+-------+--------------+------------+



In [141]:
symbol = 'KO'
url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Coca-Cola Co'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("annualReports.operatingCashflow"),f.col("annualReports.profitLoss"),f.col("annualReports.netIncome"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

caca_com_cash_flow = add_company_col.toDF("operatingCashflow","profitLoss",'netIncome',"companyname")
coca_com_cash_flow_f = caca_com_cash_flow.withColumn("operatingCashflow", caca_com_cash_flow["operatingCashflow"].getItem(1))\
                .withColumn("profitLoss", caca_com_cash_flow["profitLoss"].getItem(1))\
                .withColumn("netIncome", caca_com_cash_flow["netIncome"].getItem(1))
coca_com_cash_flow_f.show()

+-----------------+----------+----------+------------+
|operatingCashflow|profitLoss| netIncome| companyname|
+-----------------+----------+----------+------------+
|       9844000000|7768000000|7747000000|Coca-Cola Co|
+-----------------+----------+----------+------------+



In [142]:
with_peratio=coca_com_df.join(coca_com_df1,coca_com_df.company_name ==  coca_com_df1.companyname,"inner")
coca_cola_com=with_peratio.withColumnRenamed('PERatio', 'PERatio').withColumnRenamed('GrossProfitTTM', 'profit_margin')
coca_cola_com = coca_cola_com.select('open','high','low','close','PERatio','profit_margin','company_name')
coca_cola_com.show(truncate=False)

+-----+-----+------+-----+-------+-------------+------------+
|open |high |low   |close|PERatio|profit_margin|company_name|
+-----+-----+------+-----+-------+-------------+------------+
|63.42|63.59|62.645|63.34|27.99  |23298000000  |Coca-Cola Co|
+-----+-----+------+-----+-------+-------------+------------+



                                                                                

In [143]:
with_cash_flow=coca_cola_com.join(coca_com_cash_flow_f,coca_cola_com.company_name ==  coca_com_cash_flow_f.companyname,"inner")
coca_with_cash_flow=with_cash_flow.select('open','high','low','close','PERatio','profit_margin','operatingCashflow','profitLoss','netIncome','company_name')
coca_with_cash_flow.show()



+-----+-----+------+-----+-------+-------------+-----------------+----------+----------+------------+
| open| high|   low|close|PERatio|profit_margin|operatingCashflow|profitLoss| netIncome|company_name|
+-----+-----+------+-----+-------+-------------+-----------------+----------+----------+------------+
|63.42|63.59|62.645|63.34|  27.99|  23298000000|       9844000000|7768000000|7747000000|Coca-Cola Co|
+-----+-----+------+-----+-------+-------------+-----------------+----------+----------+------------+



                                                                                

Walt Disney Company (DIS)

In [144]:
symbol = 'DIS'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Walt Disney Company'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("Time Series (Daily).2022-12-22.`1. open`"), \
            f.col("Time Series (Daily).2022-12-22.`2. high`"), \
            f.col("Time Series (Daily).2022-12-22.`3. low`"), \
            f.col("Time Series (Daily).2022-12-22.`4. close`"))

add_company_col =clean_data.withColumn("company_name", lit(company_name))
disney_com_df = add_company_col.toDF("open","high","low","close","company_name")
disney_com_df.show()

+-----+-----+-----+-----+-------------------+
| open| high|  low|close|       company_name|
+-----+-----+-----+-----+-------------------+
|86.03|86.73|84.69|86.67|Walt Disney Company|
+-----+-----+-----+-----+-------------------+



In [145]:
symbol = 'DIS'
url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Walt Disney Company'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("PERatio"),f.col("GrossProfitTTM"))
add_company_col =clean_data.withColumn("company_name", lit(company_name))
disney_com_df1 = add_company_col.toDF("PERatio","GrossProfitTTM","companyname")
disney_com_df1.show()

+-------+--------------+-------------------+
|PERatio|GrossProfitTTM|        companyname|
+-------+--------------+-------------------+
|  49.44|   28321000000|Walt Disney Company|
+-------+--------------+-------------------+



In [148]:
symbol = 'DIS'
url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={symbol}&apikey={key}'
r = requests.get(url)
data = [r.json()]

company_name = 'Walt Disney Company'
rdd = spark.sparkContext.parallelize(data)
df = spark.read.json(rdd)
clean_data = df.select(f.col("annualReports.operatingCashflow"),f.col("annualReports.profitLoss"),f.col("annualReports.netIncome"))

add_company_col =clean_data.withColumn("company_name", lit(company_name)) \

dis_com_cash_flow = add_company_col.toDF("operatingCashflow","profitLoss",'netIncome',"companyname")
dis_com_cash_flow_f = dis_com_cash_flow.withColumn("operatingCashflow", dis_com_cash_flow["operatingCashflow"].getItem(1))\
                .withColumn("profitLoss", dis_com_cash_flow["profitLoss"].getItem(1))\
                .withColumn("netIncome", dis_com_cash_flow["netIncome"].getItem(1))
dis_com_cash_flow_f.show()

+-----------------+----------+----------+-------------------+
|operatingCashflow|profitLoss| netIncome|        companyname|
+-----------------+----------+----------+-------------------+
|      12035000000|2507000000|1995000000|Walt Disney Company|
+-----------------+----------+----------+-------------------+



In [149]:
with_peratio=disney_com_df.join(disney_com_df1,disney_com_df.company_name ==  disney_com_df1.companyname,"inner")
disney_com=with_peratio.withColumnRenamed('PERatio', 'PERatio').withColumnRenamed('GrossProfitTTM', 'profit_margin')
disney_com=disney_com.select('open','high','low','close','PERatio','profit_margin','company_name')
disney_com.show(truncate=False)

+-----+-----+-----+-----+-------+-------------+-------------------+
|open |high |low  |close|PERatio|profit_margin|company_name       |
+-----+-----+-----+-----+-------+-------------+-------------------+
|86.03|86.73|84.69|86.67|49.44  |28321000000  |Walt Disney Company|
+-----+-----+-----+-----+-------+-------------+-------------------+



In [150]:
with_cash_flow=disney_com.join(dis_com_cash_flow_f,disney_com.company_name ==  dis_com_cash_flow_f.companyname,"inner")
dis_with_cash_flow=with_cash_flow.select('open','high','low','close','PERatio','profit_margin','operatingCashflow','profitLoss','netIncome','company_name')
dis_with_cash_flow.show()



+-----+-----+-----+-----+-------+-------------+-----------------+----------+----------+-------------------+
| open| high|  low|close|PERatio|profit_margin|operatingCashflow|profitLoss| netIncome|       company_name|
+-----+-----+-----+-----+-------+-------------+-----------------+----------+----------+-------------------+
|86.03|86.73|84.69|86.67|  49.44|  28321000000|      12035000000|2507000000|1995000000|Walt Disney Company|
+-----+-----+-----+-----+-------+-------------+-----------------+----------+----------+-------------------+



                                                                                

In [155]:
try:
    union1 = amzn_with_cash_flow.union(intel_with_cash_flow)
    union2 = union1.union(jpm_with_cash_flow)
    union3 = union2.union(coca_with_cash_flow)
    union4 = union3.union(dis_with_cash_flow)
    final_df = union4
    final_df.show()
except Exception as e:
    print(e)



+-----+------+------+------+-------+-------------+-----------------+-----------+-----------+-------------------+
| open|  high|   low| close|PERatio|profit_margin|operatingCashflow| profitLoss|  netIncome|       company_name|
+-----+------+------+------+-------+-------------+-----------------+-----------+-----------+-------------------+
|85.52| 85.68| 82.25| 83.79|  76.12| 197478000000|      66064000000|21331000000|21331000000|     Amazon.com Inc|
|26.45|26.452| 25.35| 25.97|   8.08|  43815000000|      35384000000|20899000000|20899000000|  Intel Corporation|
|131.1| 131.3|128.41|130.66|  11.08| 130898000000|     -79910000000|29131000000|29131000000|JPMorgan Chase & Co|
|63.42| 63.59|62.645| 63.34|  27.99|  23298000000|       9844000000| 7768000000| 7747000000|       Coca-Cola Co|
|86.03| 86.73| 84.69| 86.67|  49.44|  28321000000|      12035000000| 2507000000| 1995000000|Walt Disney Company|
+-----+------+------+------+-------+-------------+-----------------+-----------+-----------+----

                                                                                

In [156]:
spark.stop()