In [None]:
#Imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import os
from config import *

In [None]:
#Retrive data from Alpha Vantage
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={Company}&outputsize=full&apikey={API_Key}"
r = requests.get(url)
data = r.json()
time_series = data["Time Series (Daily)"]
df = pd.DataFrame.from_dict(time_series, orient="index")
df.columns = ["open", "high", "low", "close", "volume"]
df = df.reset_index().rename(columns={"index": "date"})

In [None]:
#Fix stock split
df["date"] = pd.to_datetime(df["date"])

split = pd.to_datetime("2022-07-15") #When the stock split occured

filtered_df = df[(df["date"]>= Date_Start) & (df["date"]<= Date_End)] #keep only data between these years
#Due to how working days fall, the actual data range is from 2020-01-02 to 2024-12-31

#Stock underwent a stock split in 2022 July 15, so all price values before that should be /20 and volume should be *20
for col in ["open", "high", "low", "close"]:
    filtered_df[col] = pd.to_numeric(filtered_df[col])
    filtered_df.loc[filtered_df["date"] <= split, col] /= 20

#For volume
filtered_df["volume"] = pd.to_numeric(filtered_df["volume"])
filtered_df.loc[filtered_df["date"] <= split, "volume"] *= 20

In [None]:
#Save data as CSV in same directory
filtered_df.to_csv("filtered_price.csv", index=False)

In [None]:
#Plot data
plt.plot(filtered_df["date"], filtered_df["close"])
plt.xlabel("Date (2020-01-02 to 2025-01-01)")
plt.ylabel("Closing Price in $")
plt.title("Closing price per day for the stock GOOG")
plt.grid(axis='y', linestyle='--')
plt.show()

In [None]:
import os

#Add padding data for technical indicators
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=GOOG&outputsize=full&apikey={API_Key}"
r = requests.get(url)
data = r.json()
time_series = data["Time Series (Daily)"]
df = pd.DataFrame.from_dict(time_series, orient="index")
df.columns = ["open", "high", "low", "close", "volume"]
df = df.reset_index().rename(columns={"index": "date"})

#Fix stock split
df["date"] = pd.to_datetime(df["date"])

split = pd.to_datetime("2022-07-15") #When the stock split occured

padding = pd.to_datetime("2019-06-01")

filtered_df = df[(df["date"]>= padding) & (df["date"]<= Date_Start)] #keep only data between these years
#Due to how working days fall, the actual data range is from 2020-01-02 to 2024-12-31

#Stock underwent a stock split in 2022 July 15, so all price values before that should be /20 and volume should be *20
for col in ["open", "high", "low", "close"]:
    filtered_df[col] = pd.to_numeric(filtered_df[col])
    filtered_df.loc[filtered_df["date"] <= split, col] /= 20

#For volume
filtered_df["volume"] = pd.to_numeric(filtered_df["volume"])
filtered_df.loc[filtered_df["date"] <= split, "volume"] *= 20

#Load existing filtered_price.csv if it exists

output_path = r"C:\Users\matti\OneDrive\Thesis\Data\filtered_price.csv"

if os.path.exists(output_path):
    existing_df = pd.read_csv(output_path, parse_dates=["date"])

    new_rows = filtered_df[~filtered_df["date"].isin(existing_df["date"])]
    combined_df = pd.concat([existing_df, new_rows], ignore_index=True)

    combined_df = combined_df.sort_values("date", ascending=False)
else:
    combined_df = filtered_df.sort_values("date", ascending=False)

#Save back to the CSV
combined_df.to_csv(output_path, index=False)

