In [14]:
import webdriver_manager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from IPython.display import clear_output
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import IPython
import os
from decouple import config
import re

In [2]:
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

[WDM] - Downloading: 100%|██████████| 7.05M/7.05M [00:01<00:00, 6.52MB/s]


# Apple History

## Product Timeline

Here are sources for the product timeline:
 - [Wikipedia](https://en.wikipedia.org/wiki/Timeline_of_Apple_Inc._products)

In [24]:
url = "https://en.wikipedia.org/wiki/Timeline_of_Apple_Inc._products"
dfs = pd.read_html(url)
print(len(dfs))

47


In [28]:
len(dfs[0].columns)

1

In [30]:
final_dfs = []
for df in dfs:
    if len(df.columns) == 4:
        final_dfs.append(df)
print(len(final_dfs))

42


In [34]:
merged_df = pd.concat(final_dfs)
merged_df.head()

Unnamed: 0,Released,Model,Family,Discontinued
0,"April 11, 1976",Apple I,Apple I,"September 30, 1977"
1,"June 1, 1977",Apple II,Apple II,"May 1, 1979"
2,"June 1, 1978",Disk II,Drives,"May 1, 1984"
3,"June 1, 1979",Apple II Plus,Apple II series,"December 1, 1982"
4,"June 1, 1979",Apple II EuroPlus,Apple II series,"December 1, 1982"


In [35]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 651 entries, 0 to 3
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Released      651 non-null    object
 1   Model         651 non-null    object
 2   Family        651 non-null    object
 3   Discontinued  651 non-null    object
dtypes: object(4)
memory usage: 25.4+ KB


In [36]:
merged_df.isna().sum()

Released        0
Model           0
Family          0
Discontinued    0
dtype: int64

In [38]:
merged_df.to_csv("apple_products.csv", index=False)

# Apple's Dominance

## Global Ranking

Global Rankings from 2007 to 2023. The data is to be scrapped from [Brand Directory](https://brandirectory.com/rankings/global/2023/table).

In [12]:
for year in range(2007, 2024):
    print(f"Scraping {year}")
    url = f"https://brandirectory.com/rankings/global/{year}/table"
    driver.get(url)
    d_btn = driver.find_element(By.XPATH, '//*[@id="top-of-page"]/div[1]/div[2]/div/section/div/div/div[1]/div/button')
    d_btn.click()
    time.sleep(2)

Scraping 2007
Scraping 2008
Scraping 2009
Scraping 2010
Scraping 2011
Scraping 2012
Scraping 2013
Scraping 2014
Scraping 2015
Scraping 2016
Scraping 2017
Scraping 2018
Scraping 2019
Scraping 2020
Scraping 2021
Scraping 2022
Scraping 2023


In [13]:
driver.close()

# Financials

## Stock Performance

In [1]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

In [2]:
apple = yf.Ticker("AAPL")

In [47]:

apple_stock = apple.history(period="max", interval="1d", auto_adjust=True)

In [48]:
apple_stock.reset_index(inplace=True)
apple_stock["Date"] = apple_stock["Date"].dt.strftime("%Y-%m-%d")
columns_to_keep = ["Date", "Open","High", "Low",  "Close", "Volume"]
apple_stock = apple_stock[columns_to_keep]
apple_stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1980-12-12,0.099722,0.100155,0.099722,0.099722,469033600
1,1980-12-15,0.094953,0.094953,0.094519,0.094519,175884800
2,1980-12-16,0.088015,0.088015,0.087582,0.087582,105728000
3,1980-12-17,0.089749,0.090183,0.089749,0.089749,86441600
4,1980-12-18,0.092351,0.092785,0.092351,0.092351,73449600


In [49]:
apple_stock.to_csv("financials/apple_stock.csv", index=False)

In [34]:
splits = apple.actions["Stock Splits"]
splits = splits[splits > 0]
dates = splits.index
split_values = splits.values
split_final = pd.DataFrame({"Date": dates, "Split": split_values})
split_final["Date"] = pd.to_datetime(split_final["Date"]).dt.strftime("%Y-%m-%d")
split_final["Split"] = split_final["Split"].astype(int)
split_final.to_csv("financials/apple_splits.csv", index=False)
split_final

Unnamed: 0,Date,Split
0,1987-06-16,2
1,2000-06-21,2
2,2005-02-28,2
3,2014-06-09,7
4,2020-08-31,4


In [35]:
dividens = apple.actions["Dividends"]
dates = dividens.index
dividens_values = dividens.values
dividens_final = pd.DataFrame({"Date": dates, "Dividends": dividens_values})
dividens_final["Date"] = pd.to_datetime(dividens_final["Date"]).dt.strftime("%Y-%m-%d")
dividens_final["Dividends"] = dividens_final["Dividends"].astype(float)
dividens_final.to_csv("financials/apple_dividends.csv", index=False)
dividens_final

Unnamed: 0,Date,Dividends
0,1987-05-11,0.000536
1,1987-06-16,0.000000
2,1987-08-10,0.000536
3,1987-11-17,0.000714
4,1988-02-12,0.000714
...,...,...
78,2022-02-04,0.220000
79,2022-05-06,0.230000
80,2022-08-05,0.230000
81,2022-11-04,0.230000


For dividend, a value of 0 corresponds to stock split.

In [10]:
financials = apple.get_balance_sheet()

TypeError: string indices must be integers

https://www.wsj.com/market-data/quotes/AAPL/financials/annual/cash-flow

https://www.macrotrends.net/stocks/charts/AAPL/apple/income-statement?freq=A

In [7]:
url = "https://www.wsj.com/market-data/quotes/AAPL/financials/annual/cash-flow"
# dfs = pd.read_html(url)

URLError: <urlopen error [Errno -3] Temporary failure in name resolution>