# 1. Preparation

In [0]:
# Libs
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode, col
from pyspark.sql.types import DateType

#Datalake connection
blobAccessKey = dbutils.secrets.get(scope = "myscope", key = "accesskey")
spark.conf.set("fs.azure.account.key.datalakeetlproject.dfs.core.windows.net", 
               blobAccessKey) 
               
# Paths
path_bronze_profile = "abfss://bronze@datalakeetlproject.dfs.core.windows.net/profile/*.json"
path_bronze_historical = "abfss://bronze@datalakeetlproject.dfs.core.windows.net/historical/*.json"
path_silver_profile = "abfss://silver@datalakeetlproject.dfs.core.windows.net/profile/"
path_silver_historical = "abfss://silver@datalakeetlproject.dfs.core.windows.net/historical/"

# 2. Explore tables

## 2.1 Profile data

In [0]:
# Read data and show schema
df_bronze_profile = spark.read.json(path_bronze_profile)
df_bronze_profile.printSchema()

root
 |-- address: string (nullable = true)
 |-- beta: double (nullable = true)
 |-- ceo: string (nullable = true)
 |-- changes: double (nullable = true)
 |-- cik: string (nullable = true)
 |-- city: string (nullable = true)
 |-- companyName: string (nullable = true)
 |-- country: string (nullable = true)
 |-- currency: string (nullable = true)
 |-- cusip: string (nullable = true)
 |-- dcf: double (nullable = true)
 |-- dcfDiff: double (nullable = true)
 |-- defaultImage: boolean (nullable = true)
 |-- description: string (nullable = true)
 |-- exchange: string (nullable = true)
 |-- exchangeShortName: string (nullable = true)
 |-- fullTimeEmployees: string (nullable = true)
 |-- image: string (nullable = true)
 |-- industry: string (nullable = true)
 |-- ipoDate: string (nullable = true)
 |-- isActivelyTrading: boolean (nullable = true)
 |-- isAdr: boolean (nullable = true)
 |-- isEtf: boolean (nullable = true)
 |-- isFund: boolean (nullable = true)
 |-- isin: string (nullable = true)

In [0]:
# Show data
display(df_bronze_profile.limit(5))

address,beta,ceo,changes,cik,city,companyName,country,currency,cusip,dcf,dcfDiff,defaultImage,description,exchange,exchangeShortName,fullTimeEmployees,image,industry,ipoDate,isActivelyTrading,isAdr,isEtf,isFund,isin,lastDiv,mktCap,phone,price,range,sector,state,symbol,volAvg,website,zip
One Microsoft Way,1.055,Satya Nadella,11.53,789019,Redmond,Microsoft Corporation,US,USD,594918104,381.2641396727202,154.37586,False,"Microsoft Corporation develops, licenses, and supports software, services, devices, and solutions worldwide. The company operates in three segments: Productivity and Business Processes, Intelligent Cloud, and More Personal Computing. The Productivity and Business Processes segment offers Office, Exchange, SharePoint, Microsoft Teams, Office 365 Security and Compliance, Microsoft Viva, and Skype for Business; Skype, Outlook.com, OneDrive, and LinkedIn; and Dynamics 365, a set of cloud-based and on-premises business solutions for organizations and enterprise divisions. The Intelligent Cloud segment licenses SQL, Windows Servers, Visual Studio, System Center, and related Client Access Licenses; GitHub that provides a collaboration platform and code hosting service for developers; Nuance provides healthcare and enterprise AI solutions; and Azure, a cloud platform. It also offers enterprise support, Microsoft consulting, and nuance professional services to assist customers in developing, deploying, and managing Microsoft server and desktop solutions; and training and certification on Microsoft products. The More Personal Computing segment provides Windows original equipment manufacturer (OEM) licensing and other non-volume licensing of the Windows operating system; Windows Commercial, such as volume licensing of the Windows operating system, Windows cloud services, and other Windows commercial offerings; patent licensing; and Windows Internet of Things. It also offers Surface, PC accessories, PCs, tablets, gaming and entertainment consoles, and other devices; Gaming, including Xbox hardware, and Xbox content and services; video games and third-party video game royalties; and Search, including Bing and Microsoft advertising. The company sells its products through OEMs, distributors, and resellers; and directly through digital marketplaces, online stores, and retail stores. Microsoft Corporation was founded in 1975 and is headquartered in Redmond, Washington.",NASDAQ Global Select,NASDAQ,228000,https://images.financialmodelingprep.com/symbol/MSFT.png,Software - Infrastructure,1986-03-13,True,False,False,False,US5949181045,3.24,3981503178800,425 882 8080,535.64,344.79-555.45,Technology,WA,MSFT,19670220,https://www.microsoft.com,98052-6399
2788 San Tomas Expressway,2.145,Jen-Hsun Huang,6.28,1045810,Santa Clara,NVIDIA Corporation,US,USD,67066G104,147.674725605359,32.32527,False,"NVIDIA Corporation provides graphics, and compute and networking solutions in the United States, Taiwan, China, and internationally. The company's Graphics segment offers GeForce GPUs for gaming and PCs, the GeForce NOW game streaming service and related infrastructure, and solutions for gaming platforms; Quadro/NVIDIA RTX GPUs for enterprise workstation graphics; vGPU software for cloud-based visual and virtual computing; automotive platforms for infotainment systems; and Omniverse software for building 3D designs and virtual worlds. Its Compute & Networking segment provides Data Center platforms and systems for AI, HPC, and accelerated computing; Mellanox networking and interconnect solutions; automotive AI Cockpit, autonomous driving development agreements, and autonomous vehicle solutions; cryptocurrency mining processors; Jetson for robotics and other embedded platforms; and NVIDIA AI Enterprise and other software. The company's products are used in gaming, professional visualization, datacenter, and automotive markets. NVIDIA Corporation sells its products to original equipment manufacturers, original device manufacturers, system builders, add-in board manufacturers, retailers/distributors, independent software vendors, Internet and cloud service providers, automotive manufacturers and tier-1 automotive suppliers, mapping companies, start-ups, and other ecosystem participants. It has a strategic collaboration with Kroger Co. NVIDIA Corporation was incorporated in 1993 and is headquartered in Santa Clara, California.",NASDAQ Global Select,NASDAQ,36000,https://images.financialmodelingprep.com/symbol/NVDA.png,Semiconductors,1999-01-22,True,False,False,False,US67066G1040,0.04,4392000000000,408 486 2000,180.0,86.62-183.3,Technology,CA,NVDA,193026266,https://www.nvidia.com,95051
One Apple Park Way,1.165,Timothy D. Cook,0.97,320193,Cupertino,Apple Inc.,US,USD,037833100,177.13249175983253,26.21751,False,"Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts, as well as advertising services include third-party licensing arrangements and its own advertising platforms. In addition, the company offers various subscription-based services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experience with on-demand radio stations; Apple News+, a subscription news and magazine service; Apple TV+, which offers exclusive original content; Apple Card, a co-branded credit card; and Apple Pay, a cashless payment service, as well as licenses its intellectual property. The company serves consumers, and small and mid-sized businesses; and the education, enterprise, and government markets. It distributes third-party applications for its products through the App Store. The company also sells its products through its retail and online stores, and direct sales force; and third-party cellular network carriers, wholesalers, retailers, and resellers. Apple Inc. was founded in 1976 and is headquartered in Cupertino, California.",NASDAQ Global Select,NASDAQ,164000,https://images.financialmodelingprep.com/symbol/AAPL.png,Consumer Electronics,1980-12-12,True,False,False,False,US0378331005,1.01,3037194930000,(408) 996-1010,203.35,169.21-260.1,Technology,CA,AAPL,53679570,https://www.apple.com,95014
410 Terry Avenue North,1.314,Andrew R. Jassy,-3.1,1018724,Seattle,"Amazon.com, Inc.",US,USD,023135106,31.72927488988084,179.92073,False,"Amazon.com, Inc. engages in the retail sale of consumer products and subscriptions through online and physical stores in North America and internationally. The company operates through three segments: North America, International, and Amazon Web Services (AWS). Its products offered through its stores include merchandise and content purchased for resale; and products offered by third-party sellers The company also manufactures and sells electronic devices, including Kindle, Fire tablets, Fire TVs, Rings, Blink, eero, and Echo; and develops and produces media content. In addition, it offers programs that enable sellers to sell their products in its stores; and programs that allow authors, musicians, filmmakers, Twitch streamers, skill and app developers, and others to publish and sell content. Further, the company provides compute, storage, database, analytics, machine learning, and other services, as well as fulfillment, advertising, and digital content subscriptions. Additionally, it offers Amazon Prime, a membership program. The company serves consumers, sellers, developers, enterprises, content creators, and advertisers. Amazon.com, Inc. was incorporated in 1994 and is headquartered in Seattle, Washington.",NASDAQ Global Select,NASDAQ,1560000,https://images.financialmodelingprep.com/symbol/AMZN.png,Specialty Retail,1997-05-15,True,False,False,False,US0231351067,0.0,2246961060000,206 266 1000,211.65,158.54-242.52,Consumer Cyclical,WA,AMZN,43170150,https://www.amazon.com,98109-5210
1600 Amphitheatre Parkway,1.014,Sundar Pichai,5.91,1652044,Mountain View,Alphabet Inc.,US,USD,02079K305,274.94092927961213,-79.90093,False,"Alphabet Inc. provides various products and platforms in the United States, Europe, the Middle East, Africa, the Asia-Pacific, Canada, and Latin America. It operates through Google Services, Google Cloud, and Other Bets segments. The Google Services segment offers products and services, including ads, Android, Chrome, hardware, Gmail, Google Drive, Google Maps, Google Photos, Google Play, Search, and YouTube. It is also involved in the sale of apps and in-app purchases and digital content in the Google Play store; and Fitbit wearable devices, Google Nest home products, Pixel phones, and other devices, as well as in the provision of YouTube non-advertising services. The Google Cloud segment offers infrastructure, platform, and other services; Google Workspace that include cloud-based collaboration tools for enterprises, such as Gmail, Docs, Drive, Calendar, and Meet; and other services for enterprise customers. The Other Bets segment sells health technology and internet services. The company was founded in 1998 and is headquartered in Mountain View, California.",NASDAQ Global Select,NASDAQ,185719,https://images.financialmodelingprep.com/symbol/GOOGL.png,Internet Content & Information,2004-08-19,True,False,False,False,US02079K3059,0.81,2363795592800,650 253 0000,195.04,140.53-207.05,Communication Services,CA,GOOGL,42565590,https://www.abc.xyz,94043


## 2.2 Hitorical data

In [0]:
# Read data and show schema
df_bronze_historical = spark.read.json(path_bronze_historical)
df_bronze_historical.printSchema()

root
 |-- historicalStockList: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- historical: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- adjClose: double (nullable = true)
 |    |    |    |    |-- change: double (nullable = true)
 |    |    |    |    |-- changeOverTime: double (nullable = true)
 |    |    |    |    |-- changePercent: double (nullable = true)
 |    |    |    |    |-- close: double (nullable = true)
 |    |    |    |    |-- date: string (nullable = true)
 |    |    |    |    |-- high: double (nullable = true)
 |    |    |    |    |-- label: string (nullable = true)
 |    |    |    |    |-- low: double (nullable = true)
 |    |    |    |    |-- open: double (nullable = true)
 |    |    |    |    |-- unadjustedVolume: long (nullable = true)
 |    |    |    |    |-- volume: long (nullable = true)
 |    |    |    |    |-- vwap: double (nullable = true)
 |    |    |-- symbol: s

In [0]:
# Expand to first level (historicalStockList)
df_level1 = df_bronze_historical.withColumn("stock", explode(col("historicalStockList")))

# Expand to sencond level (historical)
df_level2 = df_level1.withColumn("data", explode(col("stock.historical")))

# Select fields
df_bronze_historical = df_level2.select(
    col("stock.symbol"),
    col("data.date"),
    col("data.open"),
    col("data.high"),
    col("data.low"),
    col("data.close"),
    col("data.adjClose"),
    col("data.volume"),
    col("data.change"),
    col("data.changePercent"),
    col("data.vwap")
)

# Show data
display(df_bronze_historical.limit(20))

symbol,date,open,high,low,close,adjClose,volume,change,changePercent,vwap
MSFT,2025-08-04,528.27,538.25,528.13,535.64,535.64,25349004,7.37,1.4,532.5725
MSFT,2025-08-01,535.0,535.8,520.86,524.11,524.11,28977628,-10.89,-2.04,528.9425
MSFT,2025-07-31,555.23,555.45,531.9,533.5,533.5,51617326,-21.73,-3.91,544.02
MSFT,2025-07-30,515.17,515.95,509.44,513.24,513.24,26380434,-1.93,-0.37463,513.45
MSFT,2025-07-29,515.53,517.62,511.56,512.57,512.57,16469235,-2.96,-0.57417,514.32
MSFT,2025-07-28,514.08,515.0,510.12,512.5,512.5,14308027,-1.58,-0.30735,512.925
MSFT,2025-07-25,512.47,518.29,510.36,513.71,513.71,19125700,1.25,0.24197,513.7075
MSFT,2025-07-24,508.77,513.67,507.3,510.88,510.88,16107000,2.11,0.41473,510.155
MSFT,2025-07-23,506.75,506.79,500.7,505.87,505.87,16396600,-0.88,-0.17366,505.0275
MSFT,2025-07-22,510.97,511.2,505.27,505.27,505.27,13868644,-5.7,-1.12,508.1775


## 3. Data Transformation

## 3.1 Profile

In [0]:
# Copy 'profile' data from bronze layer
df_silver_profile = df_bronze_profile

# Select relevant fields and change column names
df_silver_profile_clean = df_silver_profile.select(
    col("symbol").alias("ticker"),
    col("companyName").alias("company_name"),
    col("sector"),
    col("industry"),
    col("country"),
    col("ceo").alias("ceo_name"),
    col("exchange"),
    col("mktCap").alias("market_cap"),
    col("price"),
    col("website"),
    col("description")
).dropDuplicates(["ticker"])



In [0]:
# display 'profile' data transformed
display(df_silver_profile_clean.limit(20))

ticker,company_name,sector,industry,country,ceo_name,exchange,market_cap,price,website,description
AAPL,Apple Inc.,Technology,Consumer Electronics,US,Timothy D. Cook,NASDAQ Global Select,3037194930000,203.35,https://www.apple.com,"Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts, as well as advertising services include third-party licensing arrangements and its own advertising platforms. In addition, the company offers various subscription-based services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experience with on-demand radio stations; Apple News+, a subscription news and magazine service; Apple TV+, which offers exclusive original content; Apple Card, a co-branded credit card; and Apple Pay, a cashless payment service, as well as licenses its intellectual property. The company serves consumers, and small and mid-sized businesses; and the education, enterprise, and government markets. It distributes third-party applications for its products through the App Store. The company also sells its products through its retail and online stores, and direct sales force; and third-party cellular network carriers, wholesalers, retailers, and resellers. Apple Inc. was founded in 1976 and is headquartered in Cupertino, California."
ADBE,Adobe Inc.,Technology,Software - Infrastructure,US,Shantanu Narayen,NASDAQ Global Select,143740170000,338.85,https://www.adobe.com,"Adobe Inc. operates as a diversified software company worldwide. It operates through three segments: Digital Media, Digital Experience, and Publishing and Advertising. The Digital Media segment offers products, services, and solutions that enable individuals, teams, and enterprises to create, publish, and promote content; and Document Cloud, a unified cloud-based document services platform. Its flagship product is Creative Cloud, a subscription service that allows members to access its creative products. This segment serves content creators, workers, marketers, educators, enthusiasts, communicators, and consumers. The Digital Experience segment provides an integrated platform and set of applications and services that enable brands and businesses to create, manage, execute, measure, monetize, and optimize customer experiences from analytics to commerce. This segment serves marketers, advertisers, agencies, publishers, merchandisers, merchants, web analysts, data scientists, developers, and executives across the C-suite. The Publishing and Advertising segment offers products and services, such as e-learning solutions, technical document publishing, web conferencing, document and forms platform, web application development, and high-end printing, as well as Advertising Cloud offerings. The company offers its products and services directly to enterprise customers through its sales force and local field offices, as well as to end users through app stores and through its website at adobe.com. It also distributes products and services through a network of distributors, value-added resellers, systems integrators, software vendors and developers, retailers, and original equipment manufacturers. The company was formerly known as Adobe Systems Incorporated and changed its name to Adobe Inc. in October 2018. Adobe Inc. was founded in 1982 and is headquartered in San Jose, California."
ADP,"Automatic Data Processing, Inc.",Industrials,Staffing & Employment Services,US,Maria Black,NASDAQ Global Select,122694285980,302.26,https://www.adp.com,"Automatic Data Processing, Inc. provides cloud-based human capital management solutions worldwide. It operates in two segments, Employer Services and Professional Employer Organization (PEO). The Employer Services segment offers strategic, cloud-based platforms, and human resources (HR) outsourcing solutions. Its offerings include payroll, benefits administration, talent management, HR management, workforce management, insurance, retirement, and compliance services, as well as integrated HCM solutions. The PEO Services segment provides HR outsourcing solutions to small and mid-sized businesses through a co-employment model. This segment offers benefits package, protection and compliance, talent engagement, expertise, comprehensive outsourcing, and recruitment process outsourcing services. The company was founded in 1949 and is headquartered in Roseland, New Jersey."
AMGN,Amgen Inc.,Healthcare,Drug Manufacturers - General,US,Robert A. Bradway,NASDAQ Global Select,162354949640,301.94,https://www.amgen.com,"Amgen Inc. discovers, develops, manufactures, and delivers human therapeutics worldwide. It focuses on inflammation, oncology/hematology, bone health, cardiovascular disease, nephrology, and neuroscience areas. The company's products include Enbrel to treat plaque psoriasis, rheumatoid arthritis, and psoriatic arthritis; Neulasta that reduces the chance of infection due a low white blood cell count in patients cancer; Prolia to treat postmenopausal women with osteoporosis; Xgeva for skeletal-related events prevention; Otezla for the treatment of adult patients with plaque psoriasis, psoriatic arthritis, and oral ulcers associated with Behçet's disease; Aranesp to treat a lower-than-normal number of red blood cells and anemia; KYPROLIS to treat patients with relapsed or refractory multiple myeloma; and Repatha, which reduces the risks of myocardial infarction, stroke, and coronary revascularization. It also markets Nplate, Vectibix, MVASI, Parsabiv, EPOGEN, KANJINTI, BLINCYTO, Aimovig, EVENITY, AMGEVITATM, Sensipar/Mimpara, NEUPOGEN, IMLYGIC, Corlanor, and AVSOLA. Amgen Inc. serves healthcare providers, including physicians or their clinics, dialysis centers, hospitals, and pharmacies. It distributes its products through pharmaceutical wholesale distributors, as well as direct-to-consumer channels. It has collaboration agreements with Novartis Pharma AG; UCB; Bayer HealthCare LLC; BeiGene, Ltd.; Eli Lilly and Company; Datos Health; and Verastem, Inc. to evaluate VS-6766 in combination with lumakrastm (Sotorasib) in patients with KRAS G12C-mutant non-small cell lung cancer. It has an agreement with Kyowa Kirin Co., Ltd. to jointly develop and commercialize KHK4083, a Phase 3-ready anti-OX40 fully human monoclonal antibody for the treatment of atopic dermatitis and other autoimmune diseases; and research and development collaboration with Neumora Therapeutics, Inc. and Plexium, Inc. Amgen Inc. was incorporated in 1980 and is headquartered in Thousand Oaks, California."
AMZN,"Amazon.com, Inc.",Consumer Cyclical,Specialty Retail,US,Andrew R. Jassy,NASDAQ Global Select,2246961060000,211.65,https://www.amazon.com,"Amazon.com, Inc. engages in the retail sale of consumer products and subscriptions through online and physical stores in North America and internationally. The company operates through three segments: North America, International, and Amazon Web Services (AWS). Its products offered through its stores include merchandise and content purchased for resale; and products offered by third-party sellers The company also manufactures and sells electronic devices, including Kindle, Fire tablets, Fire TVs, Rings, Blink, eero, and Echo; and develops and produces media content. In addition, it offers programs that enable sellers to sell their products in its stores; and programs that allow authors, musicians, filmmakers, Twitch streamers, skill and app developers, and others to publish and sell content. Further, the company provides compute, storage, database, analytics, machine learning, and other services, as well as fulfillment, advertising, and digital content subscriptions. Additionally, it offers Amazon Prime, a membership program. The company serves consumers, sellers, developers, enterprises, content creators, and advertisers. Amazon.com, Inc. was incorporated in 1994 and is headquartered in Seattle, Washington."
AVGO,Broadcom Inc.,Technology,Semiconductors,US,Hock E. Tan,NASDAQ Global Select,1400317088400,297.72,https://www.broadcom.com,"Broadcom Inc. designs, develops, and supplies various semiconductor devices with a focus on complex digital and mixed signal complementary metal oxide semiconductor based devices and analog III-V based products worldwide. The company operates in two segments, Semiconductor Solutions and Infrastructure Software. It provides set-top box system-on-chips (SoCs); cable, digital subscriber line, and passive optical networking central office/consumer premise equipment SoCs; wireless local area network access point SoCs; Ethernet switching and routing merchant silicon products; embedded processors and controllers; serializer/deserializer application specific integrated circuits; optical and copper, and physical layers; and fiber optic transmitter and receiver components. The company also offers RF front end modules, filters, and power amplifiers; Wi-Fi, Bluetooth, and global positioning system/global navigation satellite system SoCs; custom touch controllers; serial attached small computer system interface, and redundant array of independent disks controllers and adapters; peripheral component interconnect express switches; fiber channel host bus adapters; read channel based SoCs; custom flash controllers; preamplifiers; and optocouplers, industrial fiber optics, and motion control encoders and subsystems. Its products are used in various applications, including enterprise and data center networking, home connectivity, set-top boxes, broadband access, telecommunication equipment, smartphones and base stations, data center servers and storage systems, factory automation, power generation and alternative energy systems, and electronic displays. Broadcom Inc. was incorporated in 2018 and is headquartered in San Jose, California."
BLK,"BlackRock, Inc.",Financial Services,Asset Management,US,Laurence Douglas Fink,New York Stock Exchange,172906711560,1116.06,https://www.blackrock.com,"BlackRock, Inc. is a publicly owned investment manager. The firm primarily provides its services to institutional, intermediary, and individual investors including corporate, public, union, and industry pension plans, insurance companies, third-party mutual funds, endowments, public institutions, governments, foundations, charities, sovereign wealth funds, corporations, official institutions, and banks. It also provides global risk management and advisory services. The firm manages separate client-focused equity, fixed income, and balanced portfolios. It also launches and manages open-end and closed-end mutual funds, offshore funds, unit trusts, and alternative investment vehicles including structured funds. The firm launches equity, fixed income, balanced, and real estate mutual funds. It also launches equity, fixed income, balanced, currency, commodity, and multi-asset exchange traded funds. The firm also launches and manages hedge funds. It invests in the public equity, fixed income, real estate, currency, commodity, and alternative markets across the globe. The firm primarily invests in growth and value stocks of small-cap, mid-cap, SMID-cap, large-cap, and multi-cap companies. It also invests in dividend-paying equity securities. The firm invests in investment grade municipal securities, government securities including securities issued or guaranteed by a government or a government agency or instrumentality, corporate bonds, and asset-backed and mortgage-backed securities. It employs fundamental and quantitative analysis with a focus on bottom-up and top-down approach to make its investments. The firm employs liquidity, asset allocation, balanced, real estate, and alternative strategies to make its investments. In real estate sector, it seeks to invest in Poland and Germany. The firm benchmarks the performance of its portfolios against various S&P, Russell, Barclays, MSCI, Citigroup, and Merrill Lynch indices. BlackRock, Inc. was founded in 1988 and is based in New York City with additional offices in Boston, Massachusetts; London, United Kingdom; Gurgaon, India; Hong Kong; Greenwich, Connecticut; Princeton, New Jersey; Edinburgh, United Kingdom; Sydney, Australia; Taipei, Taiwan; Singapore; Sao Paulo, Brazil; Philadelphia, Pennsylvania; Washington, District of Columbia; Toronto, Canada; Wilmington, Delaware; and San Francisco, California."
BRK-B,Berkshire Hathaway Inc.,Financial Services,Insurance - Diversified,US,Warren E. Buffett,New York Stock Exchange,992590851052,459.11,https://www.berkshirehathaway.com,"Berkshire Hathaway Inc., through its subsidiaries, engages in the insurance, freight rail transportation, and utility businesses worldwide. The company provides property, casualty, life, accident, and health insurance and reinsurance; and operates railroad systems in North America. It also generates, transmits, stores, and distributes electricity from natural gas, coal, wind, solar, hydroelectric, nuclear, and geothermal sources; operates natural gas distribution and storage facilities, interstate pipelines, liquefied natural gas facilities, and compressor and meter stations; and holds interest in coal mining assets. In addition, the company manufactures boxed chocolates and other confectionery products; specialty chemicals, metal cutting tools, and components for aerospace and power generation applications; flooring products; insulation, roofing, and engineered products; building and engineered components; paints and coatings; and bricks and masonry products, as well as offers manufactured and site-built home construction, and related lending and financial services. Further, it provides recreational vehicles, apparel and footwear products, jewelry, and custom picture framing products, as well as alkaline batteries; castings, forgings, fasteners/fastener systems, aerostructures, and precision components; and cobalt, nickel, and titanium alloys. Additionally, the company distributes televisions and information; franchises and services quick service restaurants; distributes electronic components; and offers logistics services, grocery and foodservice distribution services, and professional aviation training and shared aircraft ownership programs. It also retails automobiles; furniture, bedding, and accessories; household appliances, electronics, and computers; jewelry, watches, crystal, china, stemware, flatware, gifts, and collectibles; kitchenware; and motorcycle clothing and equipment. The company was incorporated in 1998 and is headquartered in Omaha, Nebraska."
C,Citigroup Inc.,Financial Services,Banks - Diversified,US,Jane Nind Fraser,New York Stock Exchange,171980578400,92.08,https://www.citigroup.com,"Citigroup Inc., a diversified financial services holding company, provides various financial products and services to consumers, corporations, governments, and institutions in North America, Latin America, Asia, Europe, the Middle East, and Africa. The company operates in two segments, Global Consumer Banking (GCB) and Institutional Clients Group (ICG). The GCB segment offers traditional banking services to retail customers through retail banking, Citi-branded cards, and Citi retail services. It also provides various banking, credit card, lending, and investment services through a network of local branches, offices, and electronic delivery systems. The ICG segment offers wholesale banking products and services, including fixed income and equity sales and trading, foreign exchange, prime brokerage, derivative, equity and fixed income research, corporate lending, investment banking and advisory, private banking, cash management, trade finance, and securities services to corporate, institutional, public sector, and high-net-worth clients. As of December 31, 2020, it operated 2,303 branches primarily in the United States, Mexico, and Asia. Citigroup Inc. was founded in 1812 and is headquartered in New York, New York."
CAT,Caterpillar Inc.,Industrials,Agricultural - Machinery,US,Joseph E. Creed CPA,New York Stock Exchange,203975615500,433.7,https://www.caterpillar.com,"Caterpillar Inc. manufactures and sells construction and mining equipment, diesel and natural gas engines, and industrial gas turbines worldwide. Its Construction Industries segment offers asphalt pavers, backhoe loaders, compactors, cold planers, compact track and multi-terrain loaders, excavators, motorgraders, pipelayers, road reclaimers, site prep tractors, skid steer loaders, telehandlers, and utility vehicles; mini, small, medium, and large excavators; compact, small, and medium wheel loaders; track-type tractors and loaders; and wheel excavators. The Resource Industries segment provides electric rope shovels, draglines, hydraulic shovels, rotary drills, hard rock vehicles, track-type tractors, mining trucks, longwall miners, wheel loaders, off-highway trucks, articulated trucks, wheel tractor scrapers, wheel dozers, fleet management, landfill compactors, soil compactors, machinery components, autonomous ready vehicles and solutions, select work tools, and safety services and mining performance solutions. The Energy & Transportation segment offers reciprocating engines, generator sets, integrated systems and solutions, turbines and turbine-related services, remanufactured reciprocating engines and components, centrifugal gas compressors, diesel-electric locomotives and components, and other rail-related products and services for marine, oil and gas, industrial, and electric power generation sectors. The company's Financial Products segment provides operating and finance leases, installment sale contracts, working capital loans, and wholesale financing plans; and insurance and risk management products for vehicles, power generation facilities, and marine vessels. The All Other operating segment manufactures filters and fluids, undercarriage, ground engaging tools, etc. The company was formerly known as Caterpillar Tractor Co. and changed its name to Caterpillar Inc. in 1986. The company was founded in 1925 and is headquartered in Deerfield, Illinois."


In [0]:
# Save into Silver layer
df_silver_profile_clean.write.mode("overwrite").format("delta").save(path_silver_profile)

## 3.2 Historical

In [0]:
df_bronze_historical.printSchema()


root
 |-- symbol: string (nullable = true)
 |-- date: string (nullable = true)
 |-- open: double (nullable = true)
 |-- high: double (nullable = true)
 |-- low: double (nullable = true)
 |-- close: double (nullable = true)
 |-- adjClose: double (nullable = true)
 |-- volume: long (nullable = true)
 |-- change: double (nullable = true)
 |-- changePercent: double (nullable = true)
 |-- vwap: double (nullable = true)



In [0]:
# Copy 'historical' data from bronze layer
df_silver_historical = df_bronze_historical

# Expand arrays
df_silver_historical_level1 = df_silver_historical.withColumn("stock", explode(col("historicalStockList")))
df_silver_historical_level2 = df_silver_historical_level1.withColumn("data", explode(col("stock.historical")))

# Select relevant fields and change column names
df_silver_historical_clean = df_silver_historical.select(
    col("symbol").alias("ticker"),
    col("date").cast(DateType()).alias("date"),
    col("open"),
    col("high"),
    col("low"),
    col("close"),
    col("adjClose").alias("adj_close"),
    col("volume"),
    col("change"),
    col("changePercent").alias("change_percent"),
    col("vwap")
).dropDuplicates(["ticker", "date"])

In [0]:
# display 'historical' data transformed
display(df_silver_historical_clean.limit(20))

ticker,date,open,high,low,close,adj_close,volume,change,change_percent,vwap
MSFT,2024-12-03,429.84,432.47,427.74,431.2,429.55,18302000,1.36,0.3164,430.3125
BRK-B,2024-09-19,462.0,462.19,457.25,459.71,459.71,3891344,-2.29,-0.49567,460.2875
BRK-B,2024-09-09,463.97,466.0,459.26,459.61,459.61,4571041,-4.36,-0.93972,462.21
NVDA,2024-12-16,134.18,134.4,130.42,132.0,131.98,237951130,-2.18,-1.62,132.75
AAPL,2025-06-23,201.63,202.3,198.96,201.5,201.5,55814300,-0.125,-0.06447453,201.0975
AMZN,2024-12-31,222.97,223.23,218.94,219.39,219.39,24819700,-3.58,-1.61,221.1325
AMZN,2024-10-15,187.63,188.41,184.58,187.69,187.69,32178925,0.06,0.03197783,187.0775
MSFT,2025-03-25,393.92,396.36,392.64,395.16,394.44,15775000,1.25,0.31478,394.52
BRK-B,2025-03-25,527.18,533.29,525.88,528.87,528.87,3964800,1.69,0.32057,528.805
BRK-B,2025-03-12,497.6,498.32,488.75,495.94,495.94,3871100,-1.66,-0.3336,495.1525


In [0]:
# Save into Silver layer
df_silver_historical_clean.write.mode("overwrite").format("delta").save(path_silver_historical)