###**Data Manipulation**

####Input Formating

In [None]:
# Assign equal weights if no portfolio weights are provided
if len(port_weights) == 0:
    port_weights = np.ones(len(stock_prices.columns)) / len(stock_prices.columns)  # Equal distribution
else:
    port_weights = np.array(port_weights)  # Convert provided weights to a NumPy array


In [None]:
# Set default start and end dates if not provided
# (Adding 45 days for later beta calculation)
if start_date == 0:
    start_date = stock_prices.loc[stock_prices.index[0] + dt.timedelta(days=45)]
if end_date == 0:
    end_date = stock_prices.index[-1]

# Format start and end dates as datetime objects
start_date = start_date.date()
end_date = end_date.date()

# Assign stock and index prices to variables
stock_prices = stock_prices
index_prices = index_prices

In [None]:
# Remove timezone information from stock and index data
stock_prices = stock_prices.tz_localize(None)
index_prices = index_prices.tz_localize(None)

####Returns

In [None]:
def log_returns(prices):
    # Calculate logarithmic returns from price data
    ret = np.log(prices / prices.shift(1))  # Log return calculation

    # Interpolate to fill any missing values using linear method
    ret = ret.interpolate(method='linear', axis=0)

    # Remove the first row (NaN values resulting from the shift)
    ret = ret.iloc[1:]

    return ret  # Return the cleaned log returns

In [None]:
# Calculate log returns for the index
index_returns=log_returns(index_prices)

# Calculate log returns for the stock portfolio
stock_returns=log_returns(stock_prices)

In [None]:
# Calculate portfolio returns with daily rebalancing based on the provided weights
rebalanced_returns = stock_returns.dot(port_weights)  # Compute weighted returns for the portfolio

# Slice the rebalanced returns to include only the period from 45 days before the start date to the start date
# This allows us to maintain consistent portfolio weights (rebalanced daily) and calculate beta accurately later on
rebalanced_returns = rebalanced_returns[start_date - dt.timedelta(days=45):start_date]

# Assign a name to the resulting Series for clarity
rebalanced_returns.name = 'Value'

####Data Preprocessing for Portfolio Returns

In [None]:
# Get stock prices at the start date
start_date = pd.Timestamp(start_date)  # Convert start_date to a Pandas Timestamp for compatibility

# Retrieve stock prices on the day of investment initiation
price_start = pd.DataFrame(stock_prices.loc[start_date])  # Create a DataFrame for stock prices at the start date

# Create a DataFrame for portfolio weights and assign corresponding stock names
port_weights = pd.DataFrame(port_weights)  # Convert portfolio weights to a DataFrame
port_weights.index = price_start.index  # Set the index to match the stock names
port_weights.columns = ['Value']  # Rename the column to 'Value'
port_weights.index.name = 'Stock'  # Name the index as 'Stock' for clarity

In [None]:
# Calculate the total dollar amount invested in each stock based on portfolio weights
port_weights_value = port_weights * equity  # Multiply weights by total equity to get investment amounts

# Rename columns and index for consistency
port_weights_value.columns = ['Value']  # Ensure the column is named 'Value'
port_weights_value.index.name = 'Stock'  # Name the index as 'Stock' for clarity
price_start.index.name = 'Stock'  # Ensure price_start index is also named 'Stock'
price_start.columns = ['Value']  # Rename price_start column to 'Value' for consistency

# Calculate the quantity of each stock based on the invested amount and current prices
port_quantity = port_weights_value / price_start  # Divide investment amounts by stock prices to get quantities

# Convert the result to a Series for easier handling
port_quantity = pd.Series(port_quantity['Value'])  # Extract 'Value' from the DataFrame to a Series


In [None]:
# Calculate the equity values over the holding period by multiplying quantities of stocks by their prices
historical_equity = port_quantity * stock_prices  # Compute the equity value for each stock over time

# Slice the historical equity DataFrame to include only the relevant period from start_date to end_date
historical_equity = historical_equity[start_date:end_date]

# Calculate the total equity of the portfolio by summing the equity values across all stocks
port_equity = historical_equity.sum(axis=1)  # Sum the equity values for each time point
port_equity.name = 'Equity'  # Name the resulting Series for clarity

####Portfolio Returns: Merging Rebalanced and Actual Results

In [None]:
# Calculate portfolio returns using logarithmic returns based on equity values
port_ret = np.log(port_equity / port_equity.shift(1))  # Compute log returns of the portfolio

# Remove the first row to eliminate NaN values resulting from the shift
port_ret = port_ret.iloc[1:]

# Convert the portfolio returns Series to a DataFrame for consistency
port_ret = pd.DataFrame(port_ret)

# Rename the column to 'Value' for clarity in analysis
port_ret.columns = ['Value']

In [None]:
# Concatenate portfolio returns with rebalanced returns for comprehensive analysis
port_ret = pd.concat([rebalanced_returns, port_ret])  # Combine the two return Series

# Rename the column to 'Portfolio' for clarity in the analysis
port_ret.columns = ['Portfolio']
