# Shareprices Preprocessor
This programm gets daily US Shareprices as an input and calculates shareprice/20-day-ma ratio and potential return on investment if one bought shares that day and sold after three months. The data is calculated for every ticker and for every month in the specified year range.
The shareprices data is from simfin.com

### 1. Imports and helper function

In [16]:
import pandas as pd
import numpy as np

def investment_close_date(open_date):
    month = open_date + pd.offsets.MonthBegin(2)
    return month + pd.offsets.MonthEnd(1)

### 2. Main function

In [37]:
def calculate_shareprices_data(shareprices_filename, start_year, end_year):
    df = pd.read_csv(shareprices_filename, sep=';', encoding='utf-8-sig')
    df['Date'] = pd.to_datetime(df['Date'])
    
    df = df[(df['Date'] >= f'{start_year - 1}-12-01') & (df['Date'] < f'{end_year}-12-31')]

    result = []
    
    for ticker, ticker_group in df.groupby('Ticker'):
        ticker_group['MA 20'] = ticker_group['Adj. Close'].rolling(window=20).mean()
        
        for month, month_group in ticker_group.groupby(ticker_group['Date'].dt.to_period('M')):
            # First day of the month
            first_day = month_group.loc[month_group['Date'].idxmin()]
            
            # Getting share price after 3 months
            close_date = investment_close_date(first_day['Date'])
            # Day range for closing the investment is set as (close_date - 15 days) to close_date.
            # This is done to avoid a situation where, due to missing data, the investment would be "closed" very quickly, for example, after a week.
            min_close_day = close_date - pd.Timedelta(days=15)
            mask = (ticker_group['Date'] >= min_close_day) & (ticker_group['Date'] <= close_date)
            close_range = ticker_group[mask]
            if not close_range.empty:
                investment_close = close_range.iloc[-1]['Adj. Close']
            else:
                investment_close = np.nan
            
            ma_20 = first_day['MA 20']
            ma_ratio = first_day['Adj. Close'] / ma_20 if ma_20 != 0 else np.nan
            return_on_investment = investment_close / first_day['Adj. Close'] if first_day['Adj. Close'] != 0 else np.nan
            
            result.append({
                'Ticker': ticker,
                'Year': first_day['Date'].year,
                'Month': first_day['Date'].month,
                'MA Ratio': ma_ratio,
                'Return on Investment': return_on_investment
            })
    
    result_df = pd.DataFrame(result)
    result_df = result_df.dropna()
        
    print(result_df.head())

    return result_df

### 3. Data and programm execution

In [38]:
shareprices_filename = '../datasets/us-shareprices-daily.csv'
start_year = 2018
end_year = 2024
output_filename = '../datasets/shareprices_data.csv'

result = calculate_shareprices_data(shareprices_filename, start_year, end_year)
result.to_csv(output_filename, index=False)

  Ticker  Year  Month  MA Ratio  Return on Investment
2      A  2018     11  0.998181              1.169091
3      A  2018     12  1.104618              1.066583
4      A  2019      1  0.961995              1.223647
5      A  2019      2  1.066554              1.034318
6      A  2019      3  1.046990              0.826977
