In [90]:
import pandas as pd
import yfinance as yf
from sklearn.metrics import mean_squared_error
import ta
import numpy as np

# Download stock data
def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Calculate similarity between two time series
def calculate_similarity(current_data, past_data):
    min_length = min(len(current_data), len(past_data))
    current_data = current_data[:min_length]
    past_data = past_data[:min_length]
    
    # Normalize the data
    current_data = (current_data - np.mean(current_data)) / np.std(current_data)
    past_data = (past_data - np.mean(past_data)) / np.std(past_data)
    
    return mean_squared_error(current_data, past_data)


# Calibrate indicators
def calibrate_indicators(data, rsi_window, sma_short_window, sma_long_window):
    data['RSI'] = ta.momentum.RSIIndicator(data['Close'], rsi_window).rsi()
    data['SMA_short'] = ta.trend.sma_indicator(data['Close'], sma_short_window)
    data['SMA_long'] = ta.trend.sma_indicator(data['Close'], sma_long_window)

def find_similar_periods(ticker, current_start_date, current_end_date, past_start_year, past_end_year, step_months, top_n=5):
    current_data = get_stock_data(ticker, current_start_date, current_end_date)
    
    similar_periods = {}
    
    for year in range(past_start_year, past_end_year + 1):
        for month in range(3, 13, step_months):
            start_date = pd.Timestamp(year=year, month=month, day=1)
            end_date = start_date + pd.DateOffset(years=1) - pd.DateOffset(days=1)
            
            if end_date > current_end_date:
                continue
            
            past_data = get_stock_data(ticker, start_date, end_date)
            similarity_score = calculate_similarity(current_data['Close'], past_data['Close'])
            
            similar_periods[(start_date, end_date)] = similarity_score
    
    ranked_periods = sorted(similar_periods.items(), key=lambda x: x[1])
    
    return ranked_periods[:top_n]


def evaluate_performance(actual_data, predicted_data):
    # Remove NaN values from both actual and predicted data
    actual_data = actual_data.dropna()
    predicted_data = predicted_data.dropna()

    # Align the lengths of actual and predicted data after removing NaNs
    min_length = min(len(actual_data), len(predicted_data))
    actual_data = actual_data[:min_length]
    predicted_data = predicted_data[:min_length]

    return mean_squared_error(actual_data, predicted_data)

# Example: Assuming a simple prediction using SMA as a placeholder
def predict_prices(data, sma_short_window, sma_long_window):
    data['SMA_short'] = ta.trend.sma_indicator(data['Close'], sma_short_window)
    data['SMA_long'] = ta.trend.sma_indicator(data['Close'], sma_long_window)
    predicted_prices = data['SMA_short'] * 0.5 + data['SMA_long'] * 0.5  # Adjust this based on your prediction logic
    return predicted_prices


In [91]:

# Example usage
ticker_symbol = 'TSLA'
current_start = pd.Timestamp(year=2023, month=3, day=21)
current_end = pd.Timestamp(year=2024, month=3, day=21)
past_start_year = 2010
past_end_year = 2022
step_months = 12

similar_periods = find_similar_periods(ticker_symbol, current_start, current_end, past_start_year, past_end_year, step_months)
for i, period in enumerate(similar_periods, 1):
    print(f"The {i}{'' if i == 0 else 'st' if i == 1 else 'nd' if i == 2 else 'rd' if i == 3 else 'th'} most similar year period is:", period[0])


    ''' Fetch the data for the two periods
    data_current = get_stock_data('TSLA', current_start, current_end)
    # period[0][0] is the start date and period[0][1] is the end date
    data_similar = get_stock_data('TSLA', period[0][0], period[0][1])

    # Plot the closing prices for the current period
    plt.figure(figsize=(14, 7))
    plt.plot(data_current.index, data_current['Close'], label='Current Period')

    # Plot the closing prices for the most similar period
    plt.plot(data_similar.index, data_similar['Close'], label='Most Similar Period')

    plt.title('TSLA Stock Prices')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.show()'''


[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

The 1st most similar year period is: (Timestamp('2015-03-01 00:00:00'), Timestamp('2016-02-29 00:00:00'))
The 2nd most similar year period is: (Timestamp('2017-03-01 00:00:00'), Timestamp('2018-02-28 00:00:00'))
The 3rd most similar year period is: (Timestamp('2014-03-01 00:00:00'), Timestamp('2015-02-28 00:00:00'))
The 4th most similar year period is: (Timestamp('2010-03-01 00:00:00'), Timestamp('2011-02-28 00:00:00'))
The 5th most similar year period is: (Timestamp('2018-03-01 00:00:00'), Timestamp('2019-02-28 00:00:00'))





In [92]:
# Select the top similar period for calibration
top_period = similar_periods[0][0]
top_data = get_stock_data(ticker_symbol, top_period[0], top_period[1])

top_data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-03-02,13.513333,13.556000,13.055333,13.155333,13.155333,118831500
2015-03-03,13.120667,13.349333,13.021333,13.304000,13.304000,66484500
2015-03-04,13.283333,13.501333,13.147333,13.496000,13.496000,63330000
2015-03-05,13.523333,13.746000,13.343333,13.375333,13.375333,73155000
2015-03-06,13.280667,13.383333,12.810000,12.925333,12.925333,100686000
...,...,...,...,...,...,...
2016-02-22,11.341333,11.927333,11.323333,11.849333,11.849333,75901500
2016-02-23,11.744000,12.115333,11.578667,11.814000,11.814000,89766000
2016-02-24,11.516667,11.966667,11.189333,11.933333,11.933333,80934000
2016-02-25,11.910000,12.568000,11.680000,12.495333,12.495333,86260500


In [95]:
# Calibrate indicators
rsi_window = 14
sma_short_window = 20
sma_long_window = 50
calibrate_indicators(top_data, rsi_window, sma_short_window, sma_long_window)

# Evaluate performance with past future periods
future_start = top_period[1] + pd.DateOffset(days=1)
future_end = future_start + pd.DateOffset(years=2) - pd.DateOffset(days=1)
future_data = get_stock_data(ticker_symbol, future_start, future_end)

predicted_prices = predict_prices(future_data, sma_short_window, sma_long_window)

actual_prices = future_data['Close']

# Evaluate performance for SMA and RSI
sma_performance = evaluate_performance(actual_prices, predicted_prices)
future_data['RSI'] = ta.momentum.RSIIndicator(future_data['Close'], rsi_window).rsi()
rsi_performance = evaluate_performance(actual_prices, future_data['RSI'])

print(f"Performance with calibrated SMA: {sma_performance}")
print(f"Performance with calibrated RSI: {rsi_performance}")

predicted_prices


[*********************100%%**********************]  1 of 1 completed

Performance with calibrated SMA: 3.366878032887777
Performance with calibrated RSI: 1447.9282861312533





Date
2016-03-01          NaN
2016-03-02          NaN
2016-03-03          NaN
2016-03-04          NaN
2016-03-07          NaN
                ...    
2018-02-21    22.281680
2018-02-22    22.302840
2018-02-23    22.342283
2018-02-26    22.377493
2018-02-27    22.387900
Length: 503, dtype: float64