In [0]:
%run "../utils/mount_configuration"

In [0]:
%run "../utils/incremental_load"

In [0]:
tickers_indicators_df = spark.read.format('delta') \
                                  .load(f'{presentation_folder_path}/tickers_indicators')

In [0]:
from pyspark.sql.functions import max, date_format

latest_date = tickers_indicators_df.select(date_format(max('date'), 'yyyy-MM-dd')).collect()[0][0]

In [0]:
from pyspark.sql import SparkSession

# Assuming spark is your SparkSession
spark = SparkSession.builder.getOrCreate()

# Convert the list of columns to a DataFrame
columns_df = spark.createDataFrame([(col,) for col in tickers_indicators_df.columns], ["Column Name"])

# Display the DataFrame
display(columns_df)

In [0]:
from pyspark.sql.functions import col

display(tickers_indicators_df.filter(col("date") == latest_date))

In [0]:
display(tickers_indicators_df.filter(col("date") == latest_date).select("ticker", "P/E", "P/B", "D/E", "current_liquidity_rate", "net_profit_margin"))

In [0]:
from pyspark.sql.functions import col, round

# Przekształcamy dane bez zmiany nazw kolumn
warren_buffet_choice = tickers_indicators_df \
    .filter(
        (col("date") == latest_date) &  
        (col("ROE") > 0.2) &         
        (col("net_profit_margin") > 0.05) &  
        (col("net_income_3_year_cagr_pct") > 15) & 
        (col("debt_rate") < 0.5) &   
        (col("current_liquidity_rate") > 2)
    ) \
    .select(
        "ticker",
        "name",
        round(col("ROE") * 100, 2).alias("ROE"),
        round(col("net_profit_margin") * 100, 2).alias("net_profit_margin"),
        round(col("net_income_3_year_cagr_pct"), 2).alias("net_income_3_year_cagr_pct"),
        round(col("debt_rate") * 100, 2).alias("debt_rate"),
        round(col("current_liquidity_rate"), 2).alias("current_liquidity_rate"),
        round(col("market_cap"), 2).alias("market_cap"),
        round(col("close_price"), 2).alias("close_price"),
        "date"
    )

display(warren_buffet_choice)


In [0]:
from pyspark.sql.functions import col, round

benjamin_graham_choice = tickers_indicators_df \
    .filter(
        (col("date") == latest_date) &  
        (col("P/E") < 30.0) &          
        (col("P/B") < 5) &            
        (col("debt_rate") < 0.5) &      
        (col("current_liquidity_rate") > 1) & 
        (col("net_profit_margin") > 0.1)
    ) \
    .select(
        "ticker",
        "name",
        round(col("P/E"), 2).alias("P/E"),
        round(col("P/B"), 2).alias("P/B"),
        round(col("debt_rate") * 100, 2).alias("debt_rate"),
        round(col("current_liquidity_rate"), 2).alias("current_liquidity_rate"),
        round(col("net_profit_margin") * 100, 2).alias("net_profit_margin"),
        round(col("market_cap"), 2).alias("market_cap"),
        round(col("close_price"), 2).alias("close_price"),
        "date"
    )

display(benjamin_graham_choice)


In [0]:
mergeCondition = """target.ticker = source.ticker AND 
                    target.date = source.date"""

In [0]:
incrementalLoadDelta(input_df=warren_buffet_choice, databaseName="engineering_presentation", tableName="warren_buffet_stocks_selection", 
                     folderPath=presentation_folder_path, partitionField="date", mergeCondition=mergeCondition)

In [0]:
incrementalLoadDelta(input_df=benjamin_graham_choice, databaseName="engineering_presentation", tableName="benjamin_graham_stocks_selection", 
                     folderPath=presentation_folder_path, partitionField="date", mergeCondition=mergeCondition)

In [0]:
%sql
SELECT * FROM engineering_presentation.warren_buffet_stocks_selection

In [0]:
%sql
SELECT * FROM engineering_presentation.benjamin_graham_stocks_selection

In [0]:
dbutils.jobs.taskValues.set(key="evaluation_date", value=latest_date)