In [1]:
#Import modules
import requests
import json
import pandas as pd
from config import api_key
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy import stats

In [2]:
#URL and api key variables set
url_gold = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=GOLD&interval=5min&apikey=api_key&outputsize=full"
url_oil = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=OIL&interval=5min&apikey=api_key&outputsize=full"
url_btc = "https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol=BTC&market=CNY&apikey=api_key"
url_sphq = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPHQ&interval=5min&apikey=api_key&outputsize=full"

api_key = "&apikey=" + api_key

# SPHQ Data

In [3]:
#Data pull and pretty print with json
response_sphq = requests.get(url_sphq + api_key).json()
print(json.dumps(response_sphq, indent=4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Time Series with Splits and Dividend Events",
        "2. Symbol": "SPHQ",
        "3. Last Refreshed": "2023-02-13",
        "4. Output Size": "Full size",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "2005-12-06": {
            "1. open": "15.47",
            "2. high": "15.69",
            "3. low": "15.47",
            "4. close": "15.6",
            "5. adjusted close": "12.4527875026143",
            "6. volume": "343700",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2005-12-07": {
            "1. open": "15.6",
            "2. high": "15.62",
            "3. low": "15.42",
            "4. close": "15.44",
            "5. adjusted close": "12.3250666051516",
            "6. volume": "342800",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2005-12-08": {
            "1. open

In [4]:
#Data pull and create dataframe
sphq_data = response_sphq["Time Series (Daily)"]
sphq_df = pd.DataFrame(sphq_data).transpose()

sphq_df

Unnamed: 0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient
2023-02-13,46.62,47.075,46.6,47.07,47.07,298617,0.0000,1.0
2023-02-10,46.28,46.57,46.2,46.54,46.54,709903,0.0000,1.0
2023-02-09,47.0,47.07,46.19,46.31,46.31,2417001,0.0000,1.0
2023-02-08,47.03,47.15,46.52,46.58,46.58,393845,0.0000,1.0
2023-02-07,46.44,47.34,46.42,47.21,47.21,452798,0.0000,1.0
...,...,...,...,...,...,...,...,...
2005-12-12,15.75,15.8,15.59,15.67,12.5086653952542,315300,0.0000,1.0
2005-12-09,15.59,15.74,15.47,15.57,12.42883983434,295000,0.0000,1.0
2005-12-08,15.45,15.58,15.439,15.52,12.3889270538829,354100,0.0000,1.0
2005-12-07,15.6,15.62,15.42,15.44,12.3250666051516,342800,0.0000,1.0


In [5]:
#Selecting coulmns for dataframe
organized_sphq_df = sphq_df[["1. open", "2. high", "3. low", "4. close" , "6. volume"]]
organized_sphq_df

Unnamed: 0,1. open,2. high,3. low,4. close,6. volume
2023-02-13,46.62,47.075,46.6,47.07,298617
2023-02-10,46.28,46.57,46.2,46.54,709903
2023-02-09,47.0,47.07,46.19,46.31,2417001
2023-02-08,47.03,47.15,46.52,46.58,393845
2023-02-07,46.44,47.34,46.42,47.21,452798
...,...,...,...,...,...
2005-12-12,15.75,15.8,15.59,15.67,315300
2005-12-09,15.59,15.74,15.47,15.57,295000
2005-12-08,15.45,15.58,15.439,15.52,354100
2005-12-07,15.6,15.62,15.42,15.44,342800


In [6]:
#Relabeling columns without numbers
clean_sphq_df = organized_sphq_df.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low" , "4. close": "close" , "6. volume": "volume"})
clean_sphq_df = clean_sphq_df.astype(float)
clean_sphq_df

Unnamed: 0,open,high,low,close,volume
2023-02-13,46.62,47.075,46.600,47.07,298617.0
2023-02-10,46.28,46.570,46.200,46.54,709903.0
2023-02-09,47.00,47.070,46.190,46.31,2417001.0
2023-02-08,47.03,47.150,46.520,46.58,393845.0
2023-02-07,46.44,47.340,46.420,47.21,452798.0
...,...,...,...,...,...
2005-12-12,15.75,15.800,15.590,15.67,315300.0
2005-12-09,15.59,15.740,15.470,15.57,295000.0
2005-12-08,15.45,15.580,15.439,15.52,354100.0
2005-12-07,15.60,15.620,15.420,15.44,342800.0


In [7]:
#Calculating the daily change percent and creating a column for it 
daily_change_sphq = (100 * (clean_sphq_df["close"] - clean_sphq_df["open"] ) ) / clean_sphq_df["open"]
clean_sphq_df["daily change (%)"] = daily_change_sphq
clean_sphq_df

Unnamed: 0,open,high,low,close,volume,daily change (%)
2023-02-13,46.62,47.075,46.600,47.07,298617.0,0.965251
2023-02-10,46.28,46.570,46.200,46.54,709903.0,0.561798
2023-02-09,47.00,47.070,46.190,46.31,2417001.0,-1.468085
2023-02-08,47.03,47.150,46.520,46.58,393845.0,-0.956836
2023-02-07,46.44,47.340,46.420,47.21,452798.0,1.658053
...,...,...,...,...,...,...
2005-12-12,15.75,15.800,15.590,15.67,315300.0,-0.507937
2005-12-09,15.59,15.740,15.470,15.57,295000.0,-0.128287
2005-12-08,15.45,15.580,15.439,15.52,354100.0,0.453074
2005-12-07,15.60,15.620,15.420,15.44,342800.0,-1.025641


In [8]:
#Create column for ticker symbol and organize order of columns
clean_sphq_df["symbol"] = "SPHQ"
final_sphq_df = clean_sphq_df[["symbol", "open", "high", "low", "close","daily change (%)"]]
#Remove dates from the index to a column in dataframe
final_sphq_df2 = final_sphq_df.reset_index()
#Rename column header from index to date
final_sphq_df3 = final_sphq_df2.rename(columns = {'index' : 'date'})
#Organize data from oldest date to newest
final_sphq_df4 = final_sphq_df3.sort_values('date')
final_sphq_df4

Unnamed: 0,date,symbol,open,high,low,close,daily change (%)
4324,2005-12-06,SPHQ,15.47,15.690,15.470,15.60,0.840336
4323,2005-12-07,SPHQ,15.60,15.620,15.420,15.44,-1.025641
4322,2005-12-08,SPHQ,15.45,15.580,15.439,15.52,0.453074
4321,2005-12-09,SPHQ,15.59,15.740,15.470,15.57,-0.128287
4320,2005-12-12,SPHQ,15.75,15.800,15.590,15.67,-0.507937
...,...,...,...,...,...,...,...
4,2023-02-07,SPHQ,46.44,47.340,46.420,47.21,1.658053
3,2023-02-08,SPHQ,47.03,47.150,46.520,46.58,-0.956836
2,2023-02-09,SPHQ,47.00,47.070,46.190,46.31,-1.468085
1,2023-02-10,SPHQ,46.28,46.570,46.200,46.54,0.561798


In [9]:
#Create csv file for sphq dataframe
final_sphq_df4.to_csv("Resources/sphq_csv.csv", index=False)

# Gold Data

In [10]:
#API data request for gold data
response_gold = requests.get(url_gold + api_key).json()
print(json.dumps(response_gold, indent=4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Time Series with Splits and Dividend Events",
        "2. Symbol": "GOLD",
        "3. Last Refreshed": "2023-02-13",
        "4. Output Size": "Full size",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "1999-11-01": {
            "1. open": "18.0",
            "2. high": "18.06",
            "3. low": "17.56",
            "4. close": "17.81",
            "5. adjusted close": "13.1072006096843",
            "6. volume": "1678800",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "1999-11-02": {
            "1. open": "17.81",
            "2. high": "18.25",
            "3. low": "17.63",
            "4. close": "18.25",
            "5. adjusted close": "13.4310169077338",
            "6. volume": "2075100",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "1999-11-03": {
            "1. o

In [11]:
#Data pull request for gold data "Time Series (Daily)"
gold_data = response_gold["Time Series (Daily)"]
gold_data

{'2023-02-13': {'1. open': '17.74',
  '2. high': '17.98',
  '3. low': '17.71',
  '4. close': '17.93',
  '5. adjusted close': '17.93',
  '6. volume': '9490558',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2023-02-10': {'1. open': '17.79',
  '2. high': '18.05',
  '3. low': '17.69',
  '4. close': '17.93',
  '5. adjusted close': '17.93',
  '6. volume': '12472121',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2023-02-09': {'1. open': '18.54',
  '2. high': '18.64',
  '3. low': '17.61',
  '4. close': '17.81',
  '5. adjusted close': '17.81',
  '6. volume': '17830832',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2023-02-08': {'1. open': '18.54',
  '2. high': '18.56',
  '3. low': '18.11',
  '4. close': '18.34',
  '5. adjusted close': '18.34',
  '6. volume': '11946260',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2023-02-07': {'1. open': '18.35',
  '2. high': '18.69',
  '3. low': '18.19',
  '4. 

In [12]:
#Switch columns and row data
gold_df = pd.DataFrame(gold_data).transpose()

gold_df

Unnamed: 0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient
2023-02-13,17.74,17.98,17.71,17.93,17.93,9490558,0.0000,1.0
2023-02-10,17.79,18.05,17.69,17.93,17.93,12472121,0.0000,1.0
2023-02-09,18.54,18.64,17.61,17.81,17.81,17830832,0.0000,1.0
2023-02-08,18.54,18.56,18.11,18.34,18.34,11946260,0.0000,1.0
2023-02-07,18.35,18.69,18.19,18.5,18.5,15345588,0.0000,1.0
...,...,...,...,...,...,...,...,...
1999-11-05,17.56,17.69,17.13,17.19,12.6509140078873,1510400,0.0000,1.0
1999-11-04,17.69,17.94,17.56,17.69,13.0188870738526,1963000,0.0000,1.0
1999-11-03,18.19,18.25,17.75,17.88,13.1587168389195,1454300,0.0000,1.0
1999-11-02,17.81,18.25,17.63,18.25,13.4310169077338,2075100,0.0000,1.0


In [13]:
#Reorganize columns in gold dataframe
organized_gold_df = gold_df[["1. open", "2. high", "3. low", "4. close" , "6. volume"]]
organized_gold_df

Unnamed: 0,1. open,2. high,3. low,4. close,6. volume
2023-02-13,17.74,17.98,17.71,17.93,9490558
2023-02-10,17.79,18.05,17.69,17.93,12472121
2023-02-09,18.54,18.64,17.61,17.81,17830832
2023-02-08,18.54,18.56,18.11,18.34,11946260
2023-02-07,18.35,18.69,18.19,18.5,15345588
...,...,...,...,...,...
1999-11-05,17.56,17.69,17.13,17.19,1510400
1999-11-04,17.69,17.94,17.56,17.69,1963000
1999-11-03,18.19,18.25,17.75,17.88,1454300
1999-11-02,17.81,18.25,17.63,18.25,2075100


In [14]:
#Relabeled columns for gold dataframe
clean_gold_df = organized_gold_df.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low" , "4. close": "close" , "6. volume": "volume"})
clean_gold_df = clean_gold_df.astype(float)
clean_gold_df

Unnamed: 0,open,high,low,close,volume
2023-02-13,17.74,17.98,17.71,17.93,9490558.0
2023-02-10,17.79,18.05,17.69,17.93,12472121.0
2023-02-09,18.54,18.64,17.61,17.81,17830832.0
2023-02-08,18.54,18.56,18.11,18.34,11946260.0
2023-02-07,18.35,18.69,18.19,18.50,15345588.0
...,...,...,...,...,...
1999-11-05,17.56,17.69,17.13,17.19,1510400.0
1999-11-04,17.69,17.94,17.56,17.69,1963000.0
1999-11-03,18.19,18.25,17.75,17.88,1454300.0
1999-11-02,17.81,18.25,17.63,18.25,2075100.0


In [15]:
#Calculating the daily change percent and creating a column for it 
daily_change_gold = (100 * (clean_gold_df["close"] - clean_gold_df["open"] ) ) / clean_gold_df["open"]
clean_gold_df["daily change (%)"] = daily_change_gold
clean_gold_df

Unnamed: 0,open,high,low,close,volume,daily change (%)
2023-02-13,17.74,17.98,17.71,17.93,9490558.0,1.071026
2023-02-10,17.79,18.05,17.69,17.93,12472121.0,0.786959
2023-02-09,18.54,18.64,17.61,17.81,17830832.0,-3.937433
2023-02-08,18.54,18.56,18.11,18.34,11946260.0,-1.078749
2023-02-07,18.35,18.69,18.19,18.50,15345588.0,0.817439
...,...,...,...,...,...,...
1999-11-05,17.56,17.69,17.13,17.19,1510400.0,-2.107062
1999-11-04,17.69,17.94,17.56,17.69,1963000.0,0.000000
1999-11-03,18.19,18.25,17.75,17.88,1454300.0,-1.704233
1999-11-02,17.81,18.25,17.63,18.25,2075100.0,2.470522


In [16]:
#Create column with rows filled with "GOLD"
clean_gold_df["symbol"] = "GOLD"
#Reorganize the order of columns
final_gold_df = clean_gold_df[["symbol", "open", "high", "low", "close","daily change (%)"]]
#Change date from index to column in dataframe
final_gold_df2 = final_gold_df.reset_index()
#Rename index column to date
final_gold_df3 = final_gold_df2.rename(columns = {'index' : 'date'})
#Organize the data from oldest to newest 
final_gold_df4 = final_gold_df3.sort_values('date')
final_gold_df4

Unnamed: 0,date,symbol,open,high,low,close,daily change (%)
5858,1999-11-01,GOLD,18.00,18.06,17.56,17.81,-1.055556
5857,1999-11-02,GOLD,17.81,18.25,17.63,18.25,2.470522
5856,1999-11-03,GOLD,18.19,18.25,17.75,17.88,-1.704233
5855,1999-11-04,GOLD,17.69,17.94,17.56,17.69,0.000000
5854,1999-11-05,GOLD,17.56,17.69,17.13,17.19,-2.107062
...,...,...,...,...,...,...,...
4,2023-02-07,GOLD,18.35,18.69,18.19,18.50,0.817439
3,2023-02-08,GOLD,18.54,18.56,18.11,18.34,-1.078749
2,2023-02-09,GOLD,18.54,18.64,17.61,17.81,-3.937433
1,2023-02-10,GOLD,17.79,18.05,17.69,17.93,0.786959


In [17]:
#Create CSV file for gold dateframe
final_gold_df4.to_csv("Resources/gold_csv.csv", index=False)

# Oil Data 

In [18]:
#API request for oil data
response_oil = requests.get(url_oil + api_key).json()
print(json.dumps(response_oil, indent=4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Time Series with Splits and Dividend Events",
        "2. Symbol": "OIL",
        "3. Last Refreshed": "2023-02-13",
        "4. Output Size": "Full size",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "2011-04-21": {
            "1. open": "49.94",
            "2. high": "50.35",
            "3. low": "49.9",
            "4. close": "50.35",
            "5. adjusted close": "50.35",
            "6. volume": "10478",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2011-04-25": {
            "1. open": "50.57",
            "2. high": "50.57",
            "3. low": "49.91",
            "4. close": "50.05",
            "5. adjusted close": "50.05",
            "6. volume": "14900",
            "7. dividend amount": "0.0000",
            "8. split coefficient": "1.0"
        },
        "2011-04-26": {
            "1. open": "50.18",
            

In [19]:
#Format data into dataframe, organize columns and rename columns
oil_data = response_oil["Time Series (Daily)"]
oil_df = pd.DataFrame(oil_data).transpose()
organized_oil_df = oil_df[["1. open", "2. high", "3. low", "4. close" , "6. volume"]]
clean_oil_df = organized_oil_df.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low" , "4. close": "close" , "6. volume": "volume"})
clean_oil_df = clean_oil_df.astype(float)
clean_oil_df

Unnamed: 0,open,high,low,close,volume
2023-02-13,30.10,30.5800,29.98,30.00,64350.0
2023-02-10,30.00,30.3999,29.98,30.30,28615.0
2023-02-09,29.67,29.7000,29.25,29.55,18024.0
2023-02-08,29.75,29.8600,29.37,29.79,17969.0
2023-02-07,28.52,29.5000,28.52,29.50,46772.0
...,...,...,...,...,...
2011-04-28,50.78,51.1600,50.17,50.70,44765.0
2011-04-27,50.35,50.8500,49.80,50.81,35200.0
2011-04-26,50.18,50.4300,50.10,50.30,15000.0
2011-04-25,50.57,50.5700,49.91,50.05,14900.0


In [20]:
#Calculating the daily change percent and creating a column for it 
daily_change_oil = (100 * (clean_oil_df["close"] - clean_oil_df["open"] ) ) / clean_oil_df["open"]
clean_oil_df["daily change (%)"] = daily_change_oil
clean_oil_df

Unnamed: 0,open,high,low,close,volume,daily change (%)
2023-02-13,30.10,30.5800,29.98,30.00,64350.0,-0.332226
2023-02-10,30.00,30.3999,29.98,30.30,28615.0,1.000000
2023-02-09,29.67,29.7000,29.25,29.55,18024.0,-0.404449
2023-02-08,29.75,29.8600,29.37,29.79,17969.0,0.134454
2023-02-07,28.52,29.5000,28.52,29.50,46772.0,3.436185
...,...,...,...,...,...,...
2011-04-28,50.78,51.1600,50.17,50.70,44765.0,-0.157542
2011-04-27,50.35,50.8500,49.80,50.81,35200.0,0.913605
2011-04-26,50.18,50.4300,50.10,50.30,15000.0,0.239139
2011-04-25,50.57,50.5700,49.91,50.05,14900.0,-1.028278


In [21]:
#Create column with rows filled with "OIL"
clean_oil_df["symbol"] = "OIL"
#Reorganize column order
final_oil_df = clean_oil_df[["symbol", "open", "high", "low", "close", "daily change (%)"]]
#Move dates from index to column in dataframe
final_oil_df2 = final_oil_df.reset_index()
#Rename column from index to date
final_oil_df3 = final_oil_df2.rename(columns = {'index' : 'date'})
#Sort dates from oldest to newest
final_oil_df4 = final_oil_df3.sort_values('date')
final_oil_df4

Unnamed: 0,date,symbol,open,high,low,close,daily change (%)
2972,2011-04-21,OIL,49.94,50.3500,49.90,50.35,0.820985
2971,2011-04-25,OIL,50.57,50.5700,49.91,50.05,-1.028278
2970,2011-04-26,OIL,50.18,50.4300,50.10,50.30,0.239139
2969,2011-04-27,OIL,50.35,50.8500,49.80,50.81,0.913605
2968,2011-04-28,OIL,50.78,51.1600,50.17,50.70,-0.157542
...,...,...,...,...,...,...,...
4,2023-02-07,OIL,28.52,29.5000,28.52,29.50,3.436185
3,2023-02-08,OIL,29.75,29.8600,29.37,29.79,0.134454
2,2023-02-09,OIL,29.67,29.7000,29.25,29.55,-0.404449
1,2023-02-10,OIL,30.00,30.3999,29.98,30.30,1.000000


In [22]:
#Create CSV file for oil dataframe
final_oil_df4.to_csv("Resources/oil_csv.csv", index=False)

In [23]:
#Check for null values in oil dataframe
final_oil_df4.isnull().sum()

date                0
symbol              0
open                0
high                0
low                 0
close               0
daily change (%)    0
dtype: int64

# Bitcoin Data

In [24]:
#API request for bitcoin data
response_btc = requests.get(url_btc + api_key).json()
print(json.dumps(response_btc, indent=4, sort_keys=True))

{
    "Meta Data": {
        "1. Information": "Daily Prices and Volumes for Digital Currency",
        "2. Digital Currency Code": "BTC",
        "3. Digital Currency Name": "Bitcoin",
        "4. Market Code": "CNY",
        "5. Market Name": "Chinese Yuan",
        "6. Last Refreshed": "2023-02-14 00:00:00",
        "7. Time Zone": "UTC"
    },
    "Time Series (Digital Currency Daily)": {
        "2020-05-21": {
            "1a. open (CNY)": "64817.54202100",
            "1b. open (USD)": "9511.43000000",
            "2a. high (CNY)": "65274.39950900",
            "2b. high (USD)": "9578.47000000",
            "3a. low (CNY)": "60071.58050000",
            "3b. low (USD)": "8815.00000000",
            "4a. close (CNY)": "61800.12915500",
            "4b. close (USD)": "9068.65000000",
            "5. volume": "108928.78096900",
            "6. market cap (USD)": "108928.78096900"
        },
        "2020-05-22": {
            "1a. open (CNY)": "61792.36039700",
            "1b. ope

In [25]:
#Build data into dataframe for bitcoin
btc_data = response_btc["Time Series (Digital Currency Daily)"]
#change orientation of dataframe
btc_df = pd.DataFrame(btc_data).transpose()
#Specify which columns to use
organized_btc_df = btc_df[["1b. open (USD)", "2b. high (USD)", "3b. low (USD)", "4b. close (USD)", "5. volume"]]
#Rename columns 
clean_btc_df = organized_btc_df.rename(columns={"1b. open (USD)": "open", "2b. high (USD)": "high", "3b. low (USD)": "low" , "4b. close (USD)": "close" , "5. volume": "volume"})
clean_btc_df

Unnamed: 0,open,high,low,close,volume
2023-02-14,21774.63000000,21816.62000000,21678.98000000,21687.13000000,14486.13834000
2023-02-13,21782.37000000,21894.99000000,21351.07000000,21773.97000000,295730.76791000
2023-02-12,21862.02000000,22090.00000000,21630.00000000,21783.54000000,204435.65163000
2023-02-11,21625.19000000,21906.32000000,21599.78000000,21862.55000000,177021.58433000
2023-02-10,21797.83000000,21938.16000000,21451.00000000,21625.19000000,338591.94247000
...,...,...,...,...,...
2020-05-25,8718.14000000,8979.66000000,8642.72000000,8900.35000000,62833.91094900
2020-05-24,9179.01000000,9298.00000000,8700.00000000,8720.34000000,70379.86645000
2020-05-23,9170.00000000,9307.85000000,9070.00000000,9179.15000000,43526.29696600
2020-05-22,9067.51000000,9271.00000000,8933.52000000,9170.00000000,58943.13102400


In [26]:
#Adding symbol column ad fill with "BTC"
clean_btc_df["symbol"] = "BTC"
clean_btc_df

Unnamed: 0,open,high,low,close,volume,symbol
2023-02-14,21774.63000000,21816.62000000,21678.98000000,21687.13000000,14486.13834000,BTC
2023-02-13,21782.37000000,21894.99000000,21351.07000000,21773.97000000,295730.76791000,BTC
2023-02-12,21862.02000000,22090.00000000,21630.00000000,21783.54000000,204435.65163000,BTC
2023-02-11,21625.19000000,21906.32000000,21599.78000000,21862.55000000,177021.58433000,BTC
2023-02-10,21797.83000000,21938.16000000,21451.00000000,21625.19000000,338591.94247000,BTC
...,...,...,...,...,...,...
2020-05-25,8718.14000000,8979.66000000,8642.72000000,8900.35000000,62833.91094900,BTC
2020-05-24,9179.01000000,9298.00000000,8700.00000000,8720.34000000,70379.86645000,BTC
2020-05-23,9170.00000000,9307.85000000,9070.00000000,9179.15000000,43526.29696600,BTC
2020-05-22,9067.51000000,9271.00000000,8933.52000000,9170.00000000,58943.13102400,BTC


In [27]:
#Select columns to be used-remove symbol to change data type
final_btc_df = clean_btc_df[["open", "high", "low", "close"]]
#Change data type to float
final_btc_df = final_btc_df.astype(float)
#Change variable name for dataframe
new_final = final_btc_df


new_final

Unnamed: 0,open,high,low,close
2023-02-14,21774.63,21816.62,21678.98,21687.13
2023-02-13,21782.37,21894.99,21351.07,21773.97
2023-02-12,21862.02,22090.00,21630.00,21783.54
2023-02-11,21625.19,21906.32,21599.78,21862.55
2023-02-10,21797.83,21938.16,21451.00,21625.19
...,...,...,...,...
2020-05-25,8718.14,8979.66,8642.72,8900.35
2020-05-24,9179.01,9298.00,8700.00,8720.34
2020-05-23,9170.00,9307.85,9070.00,9179.15
2020-05-22,9067.51,9271.00,8933.52,9170.00


# Bitcoin Raw Data

In [28]:
#Path for bitcoin source file from Kaggle
file = "Resources/bitcoin_raw.csv"

In [29]:
#Read file path for CSV and display dataframe
btc_orig_df = pd.read_csv(file)
#Change date column to the index
btc_orig_df = btc_orig_df.set_index("Date")
#Select columns for dataframe-remove currency to convert data type
btc_orig_df = btc_orig_df[['Closing Price (USD)', '24h Open (USD)' , '24h High (USD)' ,'24h Low (USD)']]
#Change data type to float
btc_orig_df = btc_orig_df.astype(float)

btc_orig_df

Unnamed: 0_level_0,Closing Price (USD),24h Open (USD),24h High (USD),24h Low (USD)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-10-01,123.654990,124.304660,124.751660,122.563490
2013-10-02,125.455000,123.654990,125.758500,123.633830
2013-10-03,108.584830,125.455000,125.665660,83.328330
2013-10-04,118.674660,108.584830,118.675000,107.058160
2013-10-05,121.338660,118.674660,121.936330,118.005660
...,...,...,...,...
2020-09-07,10207.605003,10174.863432,10355.652495,10014.277661
2020-09-08,10381.776107,10265.742989,10383.119236,9891.734243
2020-09-09,10043.196441,10381.676816,10444.171125,9868.937984
2020-09-10,10268.460916,10136.560835,10352.415189,9989.510556


In [30]:
#Rename columns
organized_btc_orig = btc_orig_df[["24h Open (USD)","24h High (USD)","24h Low (USD)", "Closing Price (USD)"]] 
# ^^Took out 'Currency' because it's not as one of the columns and 'Date' because it's the index.
clean_btc_orig = organized_btc_orig.astype(float)
# ^^ changed it to float.

#Renamed columns to match other dateframes
clean_btc_orig = clean_btc_orig.rename(columns={"Date": "date","Currency": "symbol","24h Open (USD)": "open",
                                                    "24h High (USD)": "high","24h Low (USD)": "low",
                                                    "Closing Price (USD)": "close"})

clean_btc_orig

Unnamed: 0_level_0,open,high,low,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-10-01,124.304660,124.751660,122.563490,123.654990
2013-10-02,123.654990,125.758500,123.633830,125.455000
2013-10-03,125.455000,125.665660,83.328330,108.584830
2013-10-04,108.584830,118.675000,107.058160,118.674660
2013-10-05,118.674660,121.936330,118.005660,121.338660
...,...,...,...,...
2020-09-07,10174.863432,10355.652495,10014.277661,10207.605003
2020-09-08,10265.742989,10383.119236,9891.734243,10381.776107
2020-09-09,10381.676816,10444.171125,9868.937984,10043.196441
2020-09-10,10136.560835,10352.415189,9989.510556,10268.460916


# Bitcoin Combined

In [31]:
#Combine all bitcoin databases together
join_btc = pd.concat([new_final, clean_btc_orig], ignore_index=False) 
#Change date index to column
join_btc = join_btc.reset_index()
#REname column name from index to date
join_btc = join_btc.rename(columns={"index" : "date"})
#Organize date from oldest to newest dates
final_btc = join_btc.sort_values('date')
#Create column named symbol and fill with BTC
final_btc['symbol'] = 'BTC'
#Organize the columns for dataframe
final_join_btc = final_btc[['date','symbol', 'open', 'high', 'low', 'close']]


final_join_btc

Unnamed: 0,date,symbol,open,high,low,close
1000,2013-10-01,BTC,124.30466,124.75166,122.56349,123.65499
1001,2013-10-02,BTC,123.65499,125.75850,123.63383,125.45500
1002,2013-10-03,BTC,125.45500,125.66566,83.32833,108.58483
1003,2013-10-04,BTC,108.58483,118.67500,107.05816,118.67466
1004,2013-10-05,BTC,118.67466,121.93633,118.00566,121.33866
...,...,...,...,...,...,...
4,2023-02-10,BTC,21797.83000,21938.16000,21451.00000,21625.19000
3,2023-02-11,BTC,21625.19000,21906.32000,21599.78000,21862.55000
2,2023-02-12,BTC,21862.02000,22090.00000,21630.00000,21783.54000
1,2023-02-13,BTC,21782.37000,21894.99000,21351.07000,21773.97000


In [32]:
#Calculate the daily change percent
daily_change_btc = (100 * (final_join_btc["close"] - final_join_btc["open"] ) ) / final_join_btc["open"]
#Create column name daily change percent
final_join_btc["daily change (%)"] = daily_change_btc

#Create CSV file for combined Bitcoin datasets
final_join_btc.to_csv("Resources/btc_joined.csv", index=False)

#Change date column to the index
final_join_btc2 = final_join_btc.set_index("date")
final_join_btc2



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,symbol,open,high,low,close,daily change (%)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,BTC,124.30466,124.75166,122.56349,123.65499,-0.522643
2013-10-02,BTC,123.65499,125.75850,123.63383,125.45500,1.455671
2013-10-03,BTC,125.45500,125.66566,83.32833,108.58483,-13.447188
2013-10-04,BTC,108.58483,118.67500,107.05816,118.67466,9.292118
2013-10-05,BTC,118.67466,121.93633,118.00566,121.33866,2.244793
...,...,...,...,...,...,...
2023-02-10,BTC,21797.83000,21938.16000,21451.00000,21625.19000,-0.792005
2023-02-11,BTC,21625.19000,21906.32000,21599.78000,21862.55000,1.097609
2023-02-12,BTC,21862.02000,22090.00000,21630.00000,21783.54000,-0.358979
2023-02-13,BTC,21782.37000,21894.99000,21351.07000,21773.97000,-0.038563
