In [11]:
import os
import pandas as pd
from datetime import datetime, timedelta
import heapq
from datetime import date
import quantstats as qs 

def calculate_average_returns(folder_path):
    # Dictionary to store average returns for each ETF
    average_returns_dict = {}

    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            # Read the CSV file into a DataFrame
            df = pd.read_csv(file_path)

            # Convert 'Date' column to datetime format
            df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

            # Calculate the average returns for 1, 3, 6, 9, and 12 months
            average_returns = {}
            for months in [1, 3, 6, 9, 12]:
                end_date = df['Date'].max()
                start_date = end_date - pd.DateOffset(months=months)
                filtered_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
                returns = (filtered_data['Close'].iloc[-1] - filtered_data['Close'].iloc[0]) / filtered_data['Close'].iloc[0]
                average_returns[f'{months}_month'] = returns / months

            # Store the average returns in the dictionary with the ETF name
            average_returns_dict[filename.split('.')[0]] = average_returns

    return average_returns_dict

def calculate_overall_average_returns(average_returns_dict):
    # Dictionary to store overall average returns for each ETF
    overall_average_returns = {}

    # Iterate over each ETF
    for etf, returns_dict in average_returns_dict.items():
        # Calculate the overall average for the ETF
        overall_average = sum(returns_dict.values()) / len(returns_dict)
        overall_average_returns[etf] = overall_average

    return overall_average_returns

folder_path = r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data\Sectoral ETF"
average_returns = calculate_average_returns(folder_path)
overall_average_returns = calculate_overall_average_returns(average_returns)
top_three_etfs = heapq.nlargest(3, overall_average_returns, key=overall_average_returns.get)

def prepare_data_frame(top_three_etfs, folder_path):
    df_list = []
    for etf in top_three_etfs:
        df = pd.read_csv(folder_path + f"\\{etf}.csv")
        df["asset_name"] = etf
        df["asset_category"] = "ETF"
        df.rename(
            columns={
                "Open": "open_price",
                "High": "high_price",
                "Low": "low_price",
                "Close": "close_price",
                "Volume": "volume",
                "Date": "current_date"
            },
            inplace=True
        )
        df["current_date"] = pd.to_datetime(df['current_date'], format="%Y-%m-%d")
        df = df[df["current_date"] >= df["current_date"].max() - pd.DateOffset(years=1)]
        df.drop(["volume", "Adj Close"], axis=1, inplace=True)
        df_list.append(df)
        
    # Concatenate data frames without resetting index
    all_data = pd.concat(df_list, ignore_index=True)
    all_data["current_date"] = pd.to_datetime(all_data['current_date'], format="%Y-%m-%d")
    all_data.set_index("current_date", inplace=True)
    
    all_data["percentage_1_d_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1D").last())
    all_data["percentage_3_m_cagr"] = qs.stats.cagr(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1Y").last())
    all_data["percentage_3_m_volatility"] = qs.stats.volatility(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_volatility"] = qs.stats.volatility(all_data["close_price"].resample("1Y").last())
    
    # Calculate ratios
    grouped_data = all_data.groupby("asset_name")

    all_data["ratio_sharpe"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sharpe(x["close_price"])))
    all_data["ratio_sortino"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sortino(x["close_price"])))
    all_data["ratio_win_loss"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.win_loss_ratio(x["close_price"])))
    all_data["percentage_drawdown"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.max_drawdown(x["close_price"])))
    all_data = all_data[all_data.index == all_data.index.max()]    
    
    all_data["current_date"] = date.today()
    all_data["asset_addition_date"] = date.today()
    
    return all_data

all_data = prepare_data_frame(top_three_etfs, folder_path)

def check_spy_moving_average(file_path):
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    
    # Calculate the 10-month moving average
    moving_avg_10m = df['Close'].rolling(window=10).mean()
    
    # Get the latest close price and the latest 10-month moving average
    latest_close_price = df['Close'].iloc[-1]
    latest_10m_avg = moving_avg_10m.iloc[-1]
    
    # Check if the latest close price is lesser than the 10-month moving average
    if latest_close_price < latest_10m_avg:
        return True
    else:
        return False
        
all_cash = check_spy_moving_average(r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data\Broad Indices\SPY.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_data["current_date"] = date.today()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_data["asset_addition_date"] = date.today()


In [13]:
import os
import pandas as pd
import heapq
from datetime import datetime, timedelta, date
import quantstats as qs

def calculate_average_returns(folder_path):
    """
    Calculate the average returns for ETFs in the given folder path.

    Args:
    folder_path (str): Path to the folder containing ETF data files.

    Returns:
    dict: Dictionary containing average returns for each ETF.
    """
    average_returns_dict = {}  # Dictionary to store average returns for each ETF

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

            average_returns = {}
            for months in [1, 3, 6, 9, 12]:
                end_date = df['Date'].max()
                start_date = end_date - pd.DateOffset(months=months)
                filtered_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
                returns = (filtered_data['Close'].iloc[-1] - filtered_data['Close'].iloc[0]) / filtered_data['Close'].iloc[0]
                average_returns[f'{months}_month'] = returns / months

            average_returns_dict[filename.split('.')[0]] = average_returns

    return average_returns_dict

def calculate_overall_average_returns(average_returns_dict):
    """
    Calculate the overall average returns for each ETF.

    Args:
    average_returns_dict (dict): Dictionary containing average returns for each ETF.

    Returns:
    dict: Dictionary containing overall average returns for each ETF.
    """
    overall_average_returns = {}

    for etf, returns_dict in average_returns_dict.items():
        overall_average = sum(returns_dict.values()) / len(returns_dict)
        overall_average_returns[etf] = overall_average

    return overall_average_returns

def prepare_data_frame(top_three_etfs, folder_path):
    """
    Prepare a DataFrame containing data for the top three ETFs.

    Args:
    top_three_etfs (list): List of top three ETFs.
    folder_path (str): Path to the folder containing ETF data files.

    Returns:
    pandas.DataFrame: DataFrame containing data for the top three ETFs.
    """
    df_list = []
    for etf in top_three_etfs:
        df = pd.read_csv(os.path.join(folder_path, f"{etf}.csv"))
        df["asset_name"] = etf
        df["asset_category"] = "ETF"
        df.rename(
            columns={
                "Open": "open_price",
                "High": "high_price",
                "Low": "low_price",
                "Close": "close_price",
                "Volume": "volume",
                "Date": "current_date"
            },
            inplace=True
        )
        df["current_date"] = pd.to_datetime(df['current_date'], format="%Y-%m-%d")
        df = df[df["current_date"] >= df["current_date"].max() - pd.DateOffset(years=1)]
        df.drop(["volume", "Adj Close"], axis=1, inplace=True)
        df_list.append(df)

    all_data = pd.concat(df_list, ignore_index=True)
    all_data.set_index("current_date", inplace=True)

    # Calculate various statistics
    all_data["percentage_1_d_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1D").last())
    all_data["percentage_3_m_cagr"] = qs.stats.cagr(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1Y").last())
    all_data["percentage_3_m_volatility"] = qs.stats.volatility(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_volatility"] = qs.stats.volatility(all_data["close_price"].resample("1Y").last())

    grouped_data = all_data.groupby("asset_name")
    all_data["ratio_sharpe"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sharpe(x["close_price"])))
    all_data["ratio_sortino"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sortino(x["close_price"])))
    all_data["ratio_win_loss"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.win_loss_ratio(x["close_price"])))
    all_data["percentage_drawdown"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.max_drawdown(x["close_price"])))
    all_data = all_data[all_data.index == all_data.index.max()]    

    all_data["current_date"] = date.today()
    all_data["asset_addition_date"] = date.today()

    return all_data

def check_spy_moving_average(file_path):
    """
    Check if the latest close price of SPY is below its 10-month moving average.

    Args:
    file_path (str): Path to the SPY data file.

    Returns:
    bool: True if the latest close price is below the 10-month moving average, False otherwise.
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    moving_avg_10m = df['Close'].rolling(window=10).mean()
    latest_close_price = df['Close'].iloc[-1]
    latest_10m_avg = moving_avg_10m.iloc[-1]

    return latest_close_price < latest_10m_avg

folder_path = r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data\Sectoral ETF"
spy_file_path = r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data\Broad Indices\SPY.csv"

average_returns = calculate_average_returns(folder_path)
overall_average_returns = calculate_overall_average_returns(average_returns)
top_three_etfs = heapq.nlargest(3, overall_average_returns, key=overall_average_returns.get)

all_data = prepare_data_frame(top_three_etfs, folder_path)
all_cash = check_spy_moving_average(spy_file_path)

# Print or use 'all_cash' and 'all_data' as required
all_cash
all_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_data["current_date"] = date.today()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_data["asset_addition_date"] = date.today()


Unnamed: 0_level_0,open_price,high_price,low_price,close_price,asset_name,asset_category,percentage_1_d_cagr,percentage_3_m_cagr,percentage_1_y_cagr,percentage_3_m_volatility,percentage_1_y_volatility,ratio_sharpe,ratio_sortino,ratio_win_loss,percentage_drawdown,current_date,asset_addition_date
current_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-03-28,111.160004,111.959999,110.68,111.589996,XHB,ETF,0.298644,0.266067,0.083722,1.000855,1.390376,2.454664,3.927108,1.076583,-0.184192,2024-03-31,2024-03-31
2024-03-28,41.939999,42.220001,41.869999,42.119999,XLF,ETF,0.298644,0.266067,0.083722,1.000855,1.390376,2.337618,3.664184,1.144534,-0.116573,2024-03-31,2024-03-31
2024-03-28,81.769997,81.900002,81.510002,81.660004,XLC,ETF,0.298644,0.266067,0.083722,1.000855,1.390376,2.30576,3.81932,1.277495,-0.083127,2024-03-31,2024-03-31


In [None]:
from flask import Flask, request, jsonify
import os
import pandas as pd
import heapq
from datetime import datetime, timedelta, date
import quantstats as qs
import pyodbc

app = Flask(__name__)

# SQL Server connection parameters
driver = "{ODBC Driver 17 for SQL Server}"
server = "localhost\\SQLEXPRESS"
database = "your_database_name"
trusted_connection = "yes"

# Construct connection string
DB_CONNECTION_STRING = f"DRIVER={driver};SERVER={server};DATABASE={database};TRUSTED_CONNECTION={trusted_connection};"

def connect_to_database():
    return pyodbc.connect(DB_CONNECTION_STRING)

def calculate_average_returns(folder_path):
    """
    Calculate the average returns for ETFs in the given folder path.

    Args:
    folder_path (str): Path to the folder containing ETF data files.

    Returns:
    dict: Dictionary containing average returns for each ETF.
    """
    average_returns_dict = {}  # Dictionary to store average returns for each ETF

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

            average_returns = {}
            for months in [1, 3, 6, 9, 12]:
                end_date = df['Date'].max()
                start_date = end_date - pd.DateOffset(months=months)
                filtered_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
                returns = (filtered_data['Close'].iloc[-1] - filtered_data['Close'].iloc[0]) / filtered_data['Close'].iloc[0]
                average_returns[f'{months}_month'] = returns / months

            average_returns_dict[filename.split('.')[0]] = average_returns

    return average_returns_dict

def calculate_overall_average_returns(average_returns_dict):
    """
    Calculate the overall average returns for each ETF.

    Args:
    average_returns_dict (dict): Dictionary containing average returns for each ETF.

    Returns:
    dict: Dictionary containing overall average returns for each ETF.
    """
    overall_average_returns = {}

    for etf, returns_dict in average_returns_dict.items():
        overall_average = sum(returns_dict.values()) / len(returns_dict)
        overall_average_returns[etf] = overall_average

    return overall_average_returns

def prepare_data_frame(top_three_etfs, folder_path):
    """
    Prepare a DataFrame containing data for the top three ETFs.

    Args:
    top_three_etfs (list): List of top three ETFs.
    folder_path (str): Path to the folder containing ETF data files.

    Returns:
    pandas.DataFrame: DataFrame containing data for the top three ETFs.
    """
    df_list = []
    for etf in top_three_etfs:
        df = pd.read_csv(os.path.join(folder_path, f"{etf}.csv"))
        df["asset_name"] = etf
        df["asset_category"] = "ETF"
        df.rename(
            columns={
                "Open": "open_price",
                "High": "high_price",
                "Low": "low_price",
                "Close": "close_price",
                "Volume": "volume",
                "Date": "current_date"
            },
            inplace=True
        )
        df["current_date"] = pd.to_datetime(df['current_date'], format="%Y-%m-%d")
        df = df[df["current_date"] >= df["current_date"].max() - pd.DateOffset(years=1)]
        df.drop(["volume", "Adj Close"], axis=1, inplace=True)
        df_list.append(df)

    all_data = pd.concat(df_list, ignore_index=True)
    all_data.set_index("current_date", inplace=True)

    # Calculate various statistics
    all_data["percentage_1_d_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1D").last())
    all_data["percentage_3_m_cagr"] = qs.stats.cagr(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_cagr"] = qs.stats.cagr(all_data["close_price"].resample("1Y").last())
    all_data["percentage_3_m_volatility"] = qs.stats.volatility(all_data["close_price"].resample("3M").last())
    all_data["percentage_1_y_volatility"] = qs.stats.volatility(all_data["close_price"].resample("1Y").last())

    grouped_data = all_data.groupby("asset_name")
    all_data["ratio_sharpe"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sharpe(x["close_price"])))
    all_data["ratio_sortino"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.sortino(x["close_price"])))
    all_data["ratio_win_loss"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.win_loss_ratio(x["close_price"])))
    all_data["percentage_drawdown"] = all_data["asset_name"].map(grouped_data.apply(lambda x: qs.stats.max_drawdown(x["close_price"])))
    all_data = all_data[all_data.index == all_data.index.max()]    

    all_data["current_date"] = date.today()
    all_data["asset_addition_date"] = date.today()

    return all_data

def check_spy_moving_average(file_path):
    """
    Check if the latest close price of SPY is below its 10-month moving average.

    Args:
    file_path (str): Path to the SPY data file.

    Returns:
    bool: True if the latest close price is below the 10-month moving average, False otherwise.
    """
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    moving_avg_10m = df['Close'].rolling(window=10).mean()
    latest_close_price = df['Close'].iloc[-1]
    latest_10m_avg = moving_avg_10m.iloc[-1]

    return latest_close_price < latest_10m_avg

@app.route('/portfolio', methods=['POST'])
def portfolio():
    portfolio_name = request.json.get('portfolio_name')
    allocation = request.json.get('allocation')
    
    if not portfolio_name:
        return jsonify({'error': 'Portfolio name is required'}), 400

    folder_path = os.path.join(r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data", portfolio_name)
    spy_file_path = r"C:\Users\DELL\Desktop\Projects\Codeshastra X\Data\Broad Indices\SPY.csv"

    average_returns = calculate_average_returns(folder_path)
    overall_average_returns = calculate_overall_average_returns(average_returns)
    top_three_etfs = heapq.nlargest(3, overall_average_returns, key=overall_average_returns.get)

    all_data = prepare_data_frame(top_three_etfs, folder_path)
    all_cash = check_spy_moving_average(spy_file_path)
    
    if all_cash:
        # Connect to the database
        conn = connect_to_database()
        cursor = conn.cursor()
        
        try:
            # Delete data from portfolio details table
            cursor.execute("DELETE FROM portfolio_details WHERE portfolio_name = ? AND current_date = (SELECT MAX(current_date) FROM portfolio_details WHERE portfolio_name = ?)", (portfolio_name, portfolio_name))

            # Delete data from portfolio_performance table
            cursor.execute("DELETE FROM portfolio_performance WHERE portfolio_name = ? AND current_date = (SELECT MAX(current_date) FROM portfolio_performance WHERE portfolio_name = ?)", (portfolio_name, portfolio_name))

            # Update percentage_cash value to 50 in latest_allocation table
            cursor.execute("UPDATE latest_allocation SET percentage_allocation_cash = 50 WHERE portfolio_name = ?", (portfolio_name,))
            
            # Commit the transaction
            conn.commit()

            # Close the cursor and connection
            cursor.close()
            conn.close()
            
        except Exception as e:
            # If an error occurs, rollback the transaction and handle the exception
            conn.rollback()
            return jsonify({'error': str(e)}), 500

    else:
        if allocation > all_data['percentage_cash']:
            pass
        else:
            # Directly update percentage_cash value
            new_percentage_cash = all_data['percentage_allocation_cash'] - 3 * allocation
            cursor.execute("UPDATE latest_allocation SET percentage_allocation_cash = ? WHERE portfolio_name = ?", (new_percentage_cash, portfolio_name))
            
            # Directly update percentage_etf value
            new_percentage_etf = all_data['percentage_allocation_etf'] + allocation * 3
            cursor.execute("UPDATE latest_allocation SET percentage_allocation_etf = ? WHERE portfolio_name = ?", (new_percentage_etf, portfolio_name))
            
            # Add all_data df to the portfolio_details table
            all_data['percentage_allocation'] = allocation
            all_data.to_sql('portfolio_details', conn, if_exists='append', index=False)
            
            
            new_percentage_etf = all_data['percentage_allocation_etf'] + allocation * 3
            cursor.execute("UPDATE portfolio_performance SET percentage_allocation_etf = ? WHERE portfolio_name = ?", (new_percentage_etf, portfolio_name))
               
            # Commit the transaction
            conn.commit()

    # You can return the results as JSON
    return jsonify({'all_cash': all_cash, 'all_data': all_data.to_dict()}), 200

# Your existing functions for calculations and data preparation

if __name__ == '__main__':
    app.run(debug=True)