# API calls

## loading data

## accessing api

In [1]:
import requests
import json
import pandas as pd
from dotenv import load_dotenv
import os
import time

In [2]:
# Load environment variables and alpha vintage API key
load_dotenv('apiKeys.env')
api_key = os.getenv("alphavantage")

In [3]:
# api settings
functionType = 'TIME_SERIES_INTRADAY'
date = '2024-01'
interval = '1min'
outputsize = 'full'
ticker = 'IBM'

In [4]:
url = f"""https://www.alphavantage.co/query?\
function={functionType}&\
symbol={ticker}&\
interval={interval}&\
month={date}&\
outputsize={outputsize}&\
apikey={api_key} \
"""


url


'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=IBM&interval=1min&month=2024-01&outputsize=full&apikey=8D62YF023MPKUXT7 '

In [5]:
# Get the response
response = requests.get(url)

# Retrieve reviews
reviews = response.json()

# Print results in JSON format
print(json.dumps(reviews, indent=4))

# Convert to a data frame
reviews_df = pd.json_normalize(reviews[f"Time Series ({interval})"])
reviews_df

{
    "Meta Data": {
        "1. Information": "Intraday (1min) open, high, low, close prices and volume",
        "2. Symbol": "IBM",
        "3. Last Refreshed": "2024-01-31 19:59:00",
        "4. Interval": "1min",
        "5. Output Size": "Full size",
        "6. Time Zone": "US/Eastern"
    },
    "Time Series (1min)": {
        "2024-01-31 19:59:00": {
            "1. open": "182.6300",
            "2. high": "182.6470",
            "3. low": "182.3920",
            "4. close": "182.5560",
            "5. volume": "189"
        },
        "2024-01-31 19:57:00": {
            "1. open": "182.3430",
            "2. high": "182.3600",
            "3. low": "182.3320",
            "4. close": "182.3580",
            "5. volume": "5"
        },
        "2024-01-31 19:50:00": {
            "1. open": "182.4420",
            "2. high": "182.5580",
            "3. low": "182.4310",
            "4. close": "182.5560",
            "5. volume": "138"
        },
        "2024-01-31 19:49:00

Unnamed: 0,2024-01-31 19:59:00.1. open,2024-01-31 19:59:00.2. high,2024-01-31 19:59:00.3. low,2024-01-31 19:59:00.4. close,2024-01-31 19:59:00.5. volume,2024-01-31 19:57:00.1. open,2024-01-31 19:57:00.2. high,2024-01-31 19:57:00.3. low,2024-01-31 19:57:00.4. close,2024-01-31 19:57:00.5. volume,...,2024-01-02 04:01:00.1. open,2024-01-02 04:01:00.2. high,2024-01-02 04:01:00.3. low,2024-01-02 04:01:00.4. close,2024-01-02 04:01:00.5. volume,2024-01-02 04:00:00.1. open,2024-01-02 04:00:00.2. high,2024-01-02 04:00:00.3. low,2024-01-02 04:00:00.4. close,2024-01-02 04:00:00.5. volume
0,182.63,182.647,182.392,182.556,189,182.343,182.36,182.332,182.358,5,...,162.602,162.617,162.593,162.616,4,162.077,162.092,162.068,162.091,12


In [6]:
# Extracting the time series data
time_series_data = reviews[f"Time Series ({interval})"]

# Convert to DataFrame
df = pd.DataFrame(time_series_data).T

# Rename columns
df.columns = ["open", "high", "low", "close", "volume"]

# Convert data types
df = df.astype({"open": float, "high": float, "low": float, "close": float, "volume": int})

# Convert index to datetime
df.index = pd.to_datetime(df.index)

# Sort index
df.sort_index(inplace=True)
df

# Transpose DataFrame
# df_flipped = df.T

# Display transposed DataFrame
# df_flipped

Unnamed: 0,open,high,low,close,volume
2024-01-02 04:00:00,162.077,162.092,162.068,162.091,12
2024-01-02 04:01:00,162.602,162.617,162.593,162.616,4
2024-01-02 04:06:00,162.176,162.627,162.167,162.626,3
2024-01-02 04:10:00,162.176,162.191,162.167,162.190,2
2024-01-02 04:11:00,162.523,162.538,162.514,162.537,33
...,...,...,...,...,...
2024-01-31 19:48:00,182.630,182.647,182.620,182.645,200
2024-01-31 19:49:00,182.442,182.459,182.431,182.457,7
2024-01-31 19:50:00,182.442,182.558,182.431,182.556,138
2024-01-31 19:57:00,182.343,182.360,182.332,182.358,5


In [7]:
df['tomorrow'] = df['close'].shift(-1)
df['target'] = (df['tomorrow'] > df['close']).astype(int)
df

Unnamed: 0,open,high,low,close,volume,tomorrow,target
2024-01-02 04:00:00,162.077,162.092,162.068,162.091,12,162.616,1
2024-01-02 04:01:00,162.602,162.617,162.593,162.616,4,162.626,1
2024-01-02 04:06:00,162.176,162.627,162.167,162.626,3,162.190,0
2024-01-02 04:10:00,162.176,162.191,162.167,162.190,2,162.537,1
2024-01-02 04:11:00,162.523,162.538,162.514,162.537,33,162.487,0
...,...,...,...,...,...,...,...
2024-01-31 19:48:00,182.630,182.647,182.620,182.645,200,182.457,0
2024-01-31 19:49:00,182.442,182.459,182.431,182.457,7,182.556,1
2024-01-31 19:50:00,182.442,182.558,182.431,182.556,138,182.358,0
2024-01-31 19:57:00,182.343,182.360,182.332,182.358,5,182.556,1


# Predictions

We will use multiple models to predict the target column


In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

model = RandomForestClassifier(n_estimators=100)


In [19]:

X=df.drop(columns=['target','tomorrow'], axis=1)
y=df['target']


In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

model.fit(X_train, y_train)

In [22]:
print(f'Random Forrest Classifier Training Score: {model.score(X_train, y_train)}')
print(f'Random Forrest Classifier Testing Score: {model.score(X_test, y_test)}')

Training Score: 0.9926447736454989
Testing Score: 0.536047234307023
