# 📈 Stock Market Project with Alpha Vantage and Professional Pipeline

This notebook performs analysis and prediction of stock closing prices using data from Alpha Vantage. It includes both a classic approach and a professional version using `Pipeline` and custom `Transformer` classes from `scikit-learn`.

In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings("ignore")


## 🔗 Data loading from Alpha Vantage

In [None]:
API_KEY = "J0N6NNX7NU6OUTWW"
symbol = "IBM"
interval = "5min"
outputsize = "full"
function = "TIME_SERIES_INTRADAY"
url = f"https://www.alphavantage.co/query?function={function}&symbol={symbol}&interval={interval}&outputsize={outputsize}&apikey={API_KEY}"

response = requests.get(url)
data = response.json()
df = pd.DataFrame.from_dict(data[f'Time Series ({interval})'], orient='index')
df.index = pd.to_datetime(df.index)
df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
df = df.astype(float)
df.sort_index(inplace=True)


## 🧹 Classic preprocessing

In [None]:
df['Return'] = df['Close'].pct_change()
df['SMA_5'] = df['Close'].rolling(window=5).mean()
df['SMA_10'] = df['Close'].rolling(window=10).mean()
df['Target'] = df['Close'].shift(-1)
df.dropna(inplace=True)


## 🔀 Train / test split

In [None]:
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'SMA_5', 'SMA_10']
X = df[features]
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)


## 🤖 Classic model: LinearRegression

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred) ** 0.5
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")


## 🧩 Pipeline with custom Transformer

In [None]:
class AddSMA(BaseEstimator, TransformerMixin):
    def __init__(self, window=5):
        self.window = window

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_ = X.copy()
        X_[f'SMA_{self.window}'] = X_['Close'].rolling(window=self.window).mean()
        return X_


In [None]:
X_train_transformed = AddSMA(window=5).fit_transform(X_train)
valid_idx = X_train_transformed.dropna().index
X_train_clean = X_train_transformed.loc[valid_idx]
y_train_clean = y_train.loc[valid_idx]

pipeline = Pipeline([
    ('model', LinearRegression())
])
pipeline.fit(X_train_clean, y_train_clean)

X_test_transformed = AddSMA(window=5).fit_transform(X_test).dropna()
y_test_aligned = y_test.loc[X_test_transformed.index]
y_pred_pipe = pipeline.predict(X_test_transformed)

mae_pipe = mean_absolute_error(y_test_aligned, y_pred_pipe)
rmse_pipe = mean_squared_error(y_test_aligned, y_pred_pipe) ** 0.5
r2_pipe = r2_score(y_test_aligned, y_pred_pipe)

print(f"[PIPELINE] MAE: {mae_pipe:.4f}")
print(f"[PIPELINE] RMSE: {rmse_pipe:.4f}")
print(f"[PIPELINE] R²: {r2_pipe:.4f}")


In [None]:
cv_scores = cross_val_score(
    pipeline, 
    X_train_clean, 
    y_train_clean, 
    cv=5, 
    scoring='r2'
)

print("📊 Validación cruzada (R²):")
print(f"Scores: {cv_scores}")
print(f"Media: {cv_scores.mean():.4f}")
print(f"Desviación estándar: {cv_scores.std():.4f}")

## 🔮 Future price prediction

In [None]:
future_predictions = []
future_df = df.copy()

for _ in range(12):
    last_row = future_df.iloc[-1:].copy()
    new_index = last_row.index[-1] + pd.Timedelta("5min")

    recent = future_df[['Close']].copy()
    sma_5 = recent['Close'].rolling(window=5).mean().iloc[-1]
    sma_10 = recent['Close'].rolling(window=10).mean().iloc[-1]

    X_input = pd.DataFrame({
        'Open': [last_row['Open'].values[0]],
        'High': [last_row['High'].values[0]],
        'Low': [last_row['Low'].values[0]],
        'Close': [last_row['Close'].values[0]],
        'Volume': [last_row['Volume'].values[0]],
        'SMA_5': [sma_5],
        'SMA_10': [sma_10]
    }, index=[new_index])

    y_pred = pipeline.predict(X_input)[0]
    future_predictions.append(y_pred)

    new_row = pd.DataFrame({
        'Open': [last_row['Open'].values[0]],
        'High': [last_row['High'].values[0]],
        'Low': [last_row['Low'].values[0]],
        'Close': [y_pred],
        'Volume': [last_row['Volume'].values[0]],
        'Return': [np.nan],
        'SMA_5': [sma_5],
        'SMA_10': [sma_10],
        'Target': [np.nan]
    }, index=[new_index])

    future_df = pd.concat([future_df, new_row])

ultimo_timestamp = df.index[-1]

future_timestamps = [ultimo_timestamp + pd.Timedelta(minutes=5 * (i + 1)) for i in range(len(future_predictions))]

for ts, pred in zip(future_timestamps, future_predictions):
    print(f"{ts.strftime('%Y-%m-%d %H:%M')}: ${pred:.2f}")

In [None]:
df_pred = pd.DataFrame({
    "timestamp": future_timestamps,
    "prediction": future_predictions
})

df_pred.to_pickle("predictions.pkl")

!streamlit run dashboard.py