# Project 3 (Finance - Data Engineering Track)
- Gather data across 5 businesses.
- Seek to answer: 
    1. How have the stock prices of trended over the last years?
    1. Are there any discernible seasonal patterns in the stock prices of retail  
    companies over the last three years?


In [137]:
import pandas as pd
import numpy as np
import yfinance as yf
import datetime as dt

In [138]:
companies = ["tsla", "aapl", "mcd", "hd"]

## Financial Statements

In [139]:
# Fetch TSLA financial Statement rows
selected_rows_names = ["Total Revenue", "Gross Profit", "Total Expenses", "Net Income"]

# Select rows
selected_data = tsla_financial.loc[selected_rows_names]
financial_df = pd.DataFrame(selected_data)

# Transpose data
financial_df = financial_df.transpose()
financial_df["company_symbol"] = "TSLA"
financial_df

#Rename rows names
financial_df = financial_df.rename(columns={selected_rows_names[0]:"total_revenue",
                                            selected_rows_names[1]:"gross_profit",
                                            selected_rows_names[2]:"total_expenses",
                                            selected_rows_names[3]:"net_income"})
financial_df


Unnamed: 0,total_revenue,gross_profit,total_expenses,net_income,company_symbol
2022-12-31,81462000000.0,20853000000.0,67630000000.0,12583000000.0,TSLA
2021-12-31,53823000000.0,13606000000.0,47327000000.0,5524000000.0,TSLA
2020-12-31,31536000000.0,6630000000.0,29542000000.0,721000000.0,TSLA


In [140]:
# Reset Index
financial_df = financial_df.reset_index(names="date")

In [141]:
# TSLA Updated DF
financial_df = financial_df[["company_symbol", "date", "total_revenue", "gross_profit", "total_expenses", "net_income"]]
financial_df

Unnamed: 0,company_symbol,date,total_revenue,gross_profit,total_expenses,net_income
0,TSLA,2022-12-31,81462000000.0,20853000000.0,67630000000.0,12583000000.0
1,TSLA,2021-12-31,53823000000.0,13606000000.0,47327000000.0,5524000000.0
2,TSLA,2020-12-31,31536000000.0,6630000000.0,29542000000.0,721000000.0


In [142]:
financial_df.to_csv("../Resources/tsla_financial.csv", index=False)

In [143]:
# Fetch AAPL financial Statement rows
selected_rows_names = ["Total Revenue", "Gross Profit", "Total Expenses", "Net Income"]

# Select rows
selected_data = aapl_financial.loc[selected_rows_names]
financial_df = pd.DataFrame(selected_data)

# Transpose data
financial_df = financial_df.transpose()
financial_df["company_symbol"] = "AAPL"
financial_df

#Rename rows names
financial_df = financial_df.rename(columns={selected_rows_names[0]:"total_revenue",
                                            selected_rows_names[1]:"gross_profit",
                                            selected_rows_names[2]:"total_expenses",
                                            selected_rows_names[3]:"net_income"})
financial_df


Unnamed: 0,total_revenue,gross_profit,total_expenses,net_income,company_symbol
2023-09-30,383285000000.0,169148000000.0,268984000000.0,96995000000.0,AAPL
2022-09-30,394328000000.0,170782000000.0,274891000000.0,99803000000.0,AAPL
2021-09-30,365817000000.0,152836000000.0,256868000000.0,94680000000.0,AAPL
2020-09-30,274515000000.0,104956000000.0,208227000000.0,57411000000.0,AAPL


In [144]:
# Reset Index
financial_df = financial_df.reset_index(names="date")

In [145]:
# AAPL Updated DF
financial_df = financial_df[["company_symbol", "date", "total_revenue", "gross_profit", "total_expenses", "net_income"]]
financial_df

Unnamed: 0,company_symbol,date,total_revenue,gross_profit,total_expenses,net_income
0,AAPL,2023-09-30,383285000000.0,169148000000.0,268984000000.0,96995000000.0
1,AAPL,2022-09-30,394328000000.0,170782000000.0,274891000000.0,99803000000.0
2,AAPL,2021-09-30,365817000000.0,152836000000.0,256868000000.0,94680000000.0
3,AAPL,2020-09-30,274515000000.0,104956000000.0,208227000000.0,57411000000.0


In [146]:
financial_df.to_csv("../Resources/aapl_financial.csv", index=False)

In [147]:
# Fetch MCD financial Statement rows
selected_rows_names = ["Total Revenue", "Gross Profit", "Total Expenses", "Net Income"]

# Select rows
selected_data = mcd_financial.loc[selected_rows_names]
financial_df = pd.DataFrame(selected_data)

# Transpose data
financial_df = financial_df.transpose()
financial_df["company_symbol"] = "MCD"
financial_df

#Rename rows names
financial_df = financial_df.rename(columns={selected_rows_names[0]:"total_revenue",
                                            selected_rows_names[1]:"gross_profit",
                                            selected_rows_names[2]:"total_expenses",
                                            selected_rows_names[3]:"net_income"})
financial_df


Unnamed: 0,total_revenue,gross_profit,total_expenses,net_income,company_symbol
2022-12-31,23182600000.0,13207200000.0,12838000000.0,6177400000.0,MCD
2021-12-31,23222900000.0,12580200000.0,13350200000.0,7545200000.0,MCD
2020-12-31,19207800000.0,9752100000.0,12001300000.0,4730500000.0,MCD


In [148]:
# Reset Index
financial_df = financial_df.reset_index(names="date")

In [149]:
# MCD Updated DF
financial_df = financial_df[["company_symbol", "date", "total_revenue", "gross_profit", "total_expenses", "net_income"]]
financial_df

Unnamed: 0,company_symbol,date,total_revenue,gross_profit,total_expenses,net_income
0,MCD,2022-12-31,23182600000.0,13207200000.0,12838000000.0,6177400000.0
1,MCD,2021-12-31,23222900000.0,12580200000.0,13350200000.0,7545200000.0
2,MCD,2020-12-31,19207800000.0,9752100000.0,12001300000.0,4730500000.0


In [150]:
financial_df.to_csv("../Resources/mcd_financial.csv", index=False)

In [151]:
# Fetch HD financial Statement rows
selected_rows_names = ["Total Revenue", "Gross Profit", "Total Expenses", "Net Income"]

# Select rows
selected_data = hd_financial.loc[selected_rows_names]
financial_df = pd.DataFrame(selected_data)

# Transpose data
financial_df = financial_df.transpose()
financial_df["company_symbol"] = "HD"
financial_df

#Rename rows names
financial_df = financial_df.rename(columns={selected_rows_names[0]:"total_revenue",
                                            selected_rows_names[1]:"gross_profit",
                                            selected_rows_names[2]:"total_expenses",
                                            selected_rows_names[3]:"net_income"})
financial_df


Unnamed: 0,total_revenue,gross_profit,total_expenses,net_income,company_symbol
2023-01-31,157403000000.0,52778000000.0,133364000000.0,17105000000.0,HD
2022-01-31,151157000000.0,50832000000.0,128117000000.0,16433000000.0,HD
2021-01-31,132110000000.0,44853000000.0,113832000000.0,12866000000.0,HD
2020-01-31,110225000000.0,37572000000.0,94382000000.0,11242000000.0,HD


In [152]:
# Reset Index
financial_df = financial_df.reset_index(names="date")

In [153]:
# HD Updated DF
financial_df = financial_df[["company_symbol", "date", "total_revenue", "gross_profit", "total_expenses", "net_income"]]
financial_df

Unnamed: 0,company_symbol,date,total_revenue,gross_profit,total_expenses,net_income
0,HD,2023-01-31,157403000000.0,52778000000.0,133364000000.0,17105000000.0
1,HD,2022-01-31,151157000000.0,50832000000.0,128117000000.0,16433000000.0
2,HD,2021-01-31,132110000000.0,44853000000.0,113832000000.0,12866000000.0
3,HD,2020-01-31,110225000000.0,37572000000.0,94382000000.0,11242000000.0


In [154]:
financial_df.to_csv("../Resources/hd_financial.csv", index=False)

In [155]:
# Create all DataFrames into CSV
all_companies_df = pd.concat([tsla_financial, aapl_financial, mcd_financial, hd_financial])

# Save the combined DataFrame to a CSV
all_companies_df.to_csv('financial_data.csv', index=False)
