<a href="https://colab.research.google.com/github/NickDeMiceli20/AssemblyAI-Live/blob/main/Insights_Datasets_DONE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
from google.cloud import storage
import json
import os
import math
from google.cloud import storage

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"
def list_subfolders(bucket_name, prefix):
    storage_client = storage.Client()
    iterator = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter='/')
    prefixes = set()

    for page in iterator.pages:
        prefixes.update(page.prefixes)

    return prefixes

def extract_quarter_year(folder_name):
    parts = folder_name.rstrip('/').split('/')[-1].split('-')
    if len(parts) == 2:
        quarter, year = parts
        year = int(year)
        quarter = quarter.replace('Q', '')
        return year, int(quarter)
    return None, None

def sort_folders(subfolders):
    sorted_folders = sorted(subfolders, key=extract_quarter_year, reverse=True)
    return sorted_folders

def download_file(bucket_name, blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    return blob.download_as_bytes()

# Example usage
bucket_name = 'earningsedge'
prefix = 'fundamental/ADBE/'
subfolders = list_subfolders(bucket_name, prefix)
sorted_subfolders = sort_folders(subfolders)

if sorted_subfolders:
    latest_folder = sorted_subfolders[0]
    print(latest_folder)

    # Define the file names
    file_names = ["ADBE-growth.json", "ADBE-management-effectiveness.json", "ADBE-profitability.json"]

    # Variables to store file contents
    adbe_growth, adbe_management_effectiveness, adbe_profitability = None, None, None

    # Download and assign each file's content to a variable
    for file_name in file_names:
        file_path = f"{latest_folder}{file_name}"
        file_content = download_file(bucket_name, file_path)
        if file_name == "ADBE-growth.json":
            adbe_growth = json.loads(file_content)
        elif file_name == "ADBE-management-effectiveness.json":
            adbe_management_effectiveness = json.loads(file_content)
        elif file_name == "ADBE-profitability.json":
            adbe_profitability = json.loads(file_content)

    # Now you can use adbe_growth, adbe_management_effectiveness, and adbe_profitability as needed
else:
    print("No subfolders found.")



# Assuming adbe_growth is already defined and loaded with data

# Extract QoQ, YoY, and 5-year Average data
growth_qoq_diff = adbe_growth.get('QoQ % Difference', {})
growth_yoy_diff = adbe_growth.get('YoY % Difference', {})
growth_five_year_avg = adbe_growth.get('5-year Average', {})


'''
Sector Median Calculations
'''
import requests
import pandas as pd

# Your provided URL, headers, and query parameters
url = "https://seeking-alpha.p.rapidapi.com/symbols/get-sector-metrics"
headers = {
    "X-RapidAPI-Key": "780f2a96d1mshd6f358e08848cd1p18cd1ajsne72c995bab67",  # Replace with your actual API key
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
}
querystring = {"symbol": "adbe", "fields": "revenue_growth,diluted_eps_growth,levered_free_cash_flow_yoy,operating_income_ebit_yoy,net_margin,gross_margin,ebit_margin,levered_fcf_margin,return_on_avg_tot_assets,roe_yoy,return_on_total_capital"}

# Making the API request
response = requests.get(url, headers=headers, params=querystring)

# Parsing the response JSON
response_json = response.json()

# Extracting data and included sections
data = response_json['data']
included = response_json['included']

# Creating a dictionary to map metric type IDs to their field names
metric_field_map = {metric['id']: metric['attributes']['field'] for metric in included}
'''
Revenue Report Card
'''
# Reshaping the data to have metrics as columns
reshaped_data = {metric_field_map[metric['relationships']['metric_type']['data']['id']]: metric['attributes']['value'] for metric in data}
sector_median_df = pd.DataFrame([reshaped_data])
sector_median_revenue_growth = sector_median_df['revenue_growth'][0]
sector_median_operatingIncome_growth = sector_median_df['operating_income_ebit_yoy'][0]
sector_median_eps_growth = sector_median_df['diluted_eps_growth'][0]
sector_median_fcf_growth = sector_median_df['levered_free_cash_flow_yoy'][0]
sector_median_netmargin_growth = sector_median_df['net_margin'][0]
sector_median_grossmargin_growth = sector_median_df['gross_margin'][0]
sector_median_operatingmargin_growth = sector_median_df['ebit_margin'][0]
sector_median_fcfmargin_growth = sector_median_df['levered_fcf_margin'][0]
sector_median_roa_growth = sector_median_df['return_on_avg_tot_assets'][0]
sector_median_roe_growth = sector_median_df['roe_yoy'][0]
sector_median_return_on_total_capital_growth = sector_median_df['return_on_total_capital'][0]
# Initialize the reportCard dictionary
growth_reportCard = {}

# QoQ and YoY comparisons
for metric in ['revenue']:
    qoq_diff = adbe_growth['QoQ % Difference'].get(metric, 0)
    yoy_diff = adbe_growth['YoY % Difference'].get(metric, 0)
    five_year_avg = adbe_growth['5-year Average'].get(metric, 0)

    #Year over Year Percentage Scoring
    yoy_score = 5 if yoy_diff > 20 else \
        4 if 10 <= yoy_diff >= 0 else \
        3 if yoy_diff < five_year_avg and yoy_diff > 0 else \
        2 if yoy_diff > five_year_avg and yoy_diff < 0  else \
        1 if yoy_diff < five_year_avg and yoy_diff < 0  else \
        0

    growth_reportCard[f'Revenue YoY Score'] = yoy_score

    #Year over Year Compared to 5-Year Average
    fiveyear_difference = yoy_diff - five_year_avg

    if fiveyear_difference > 10:
        yoy_vs_5yr_median_score = 5
    elif 0 < fiveyear_difference <= 10:
        yoy_vs_5yr_median_score = 4
    elif fiveyear_difference <= 0 and yoy_diff > 0:
        yoy_vs_5yr_median_score = 3
    elif fiveyear_difference > 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 2
    elif fiveyear_difference < 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 1
    else:
        yoy_vs_5yr_median_score = 0

    growth_reportCard[f'Revenue YoY vs five Yr Avg Score'] = yoy_vs_5yr_median_score


    sector_difference = yoy_diff - sector_median_revenue_growth

    if sector_difference > 10:
        yoy_vs_sector_median_score = 5
    elif 0 < sector_difference <= 10:
        yoy_vs_sector_median_score = 4
    elif sector_difference <= 0 and yoy_diff > 0:
        yoy_vs_sector_median_score = 3
    elif sector_difference > 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 2
    elif sector_difference < 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 1
    else:
        yoy_vs_sector_median_score = 0

    growth_reportCard[f'Revenue YoY vs Sector Median Score'] = yoy_vs_sector_median_score



'''
Operating Income Report Card
'''


# QoQ and YoY comparisons
for metric in ['operatingIncome']:
    qoq_diff = adbe_growth['QoQ % Difference'].get(metric, 0)
    yoy_diff = adbe_growth['YoY % Difference'].get(metric, 0)
    five_year_avg = adbe_growth['5-year Average'].get(metric, 0)

    #Year over Year Percentage Scoring
    yoy_score = 5 if yoy_diff > 20 else \
        4 if 10 <= yoy_diff >= 0 else \
        3 if yoy_diff < five_year_avg and yoy_diff > 0 else \
        2 if yoy_diff > five_year_avg and yoy_diff < 0  else \
        1 if yoy_diff < five_year_avg and yoy_diff < 0  else \
        0

    growth_reportCard[f'Operating Income YoY Score'] = yoy_score

    #Year over Year Compared to 5-Year Average
    fiveyear_difference = yoy_diff - five_year_avg

    if fiveyear_difference > 10:
        yoy_vs_5yr_median_score = 5
    elif 0 < fiveyear_difference <= 10:
        yoy_vs_5yr_median_score = 4
    elif fiveyear_difference <= 0 and yoy_diff > 0:
        yoy_vs_5yr_median_score = 3
    elif fiveyear_difference > 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 2
    elif fiveyear_difference < 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 1
    else:
        yoy_vs_5yr_median_score = 0

    growth_reportCard[f'Operating Income YoY vs five Yr Avg Score'] = yoy_vs_5yr_median_score


    sector_difference = yoy_diff - sector_median_operatingIncome_growth

    if sector_difference > 10:
        yoy_vs_sector_median_score = 5
    elif 0 < sector_difference <= 10:
        yoy_vs_sector_median_score = 4
    elif sector_difference <= 0 and yoy_diff > 0:
        yoy_vs_sector_median_score = 3
    elif sector_difference > 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 2
    elif sector_difference < 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 1
    else:
        yoy_vs_sector_median_score = 0

    growth_reportCard[f'Operating Income YoY vs Sector Median Score'] = yoy_vs_sector_median_score


'''
EPS
'''


# QoQ and YoY comparisons
for metric in ['eps']:
    qoq_diff = adbe_growth['QoQ % Difference'].get(metric, 0)
    yoy_diff = adbe_growth['YoY % Difference'].get(metric, 0)
    five_year_avg = adbe_growth['5-year Average'].get(metric, 0)

    #Year over Year Percentage Scoring
    yoy_score = 5 if yoy_diff > 20 else \
        4 if 10 <= yoy_diff >= 0 else \
        3 if yoy_diff < five_year_avg and yoy_diff > 0 else \
        2 if yoy_diff > five_year_avg and yoy_diff < 0  else \
        1 if yoy_diff < five_year_avg and yoy_diff < 0  else \
        0

    growth_reportCard[f'EPS YoY Score'] = yoy_score

    #Year over Year Compared to 5-Year Average
    fiveyear_difference = yoy_diff - five_year_avg

    if fiveyear_difference > 10:
        yoy_vs_5yr_median_score = 5
    elif 0 < fiveyear_difference <= 10:
        yoy_vs_5yr_median_score = 4
    elif fiveyear_difference <= 0 and yoy_diff > 0:
        yoy_vs_5yr_median_score = 3
    elif fiveyear_difference > 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 2
    elif fiveyear_difference < 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 1
    else:
        yoy_vs_5yr_median_score = 0

    growth_reportCard[f'EPS YoY vs Five Yr Avg Score'] = yoy_vs_5yr_median_score


    sector_difference = yoy_diff - sector_median_eps_growth

    if sector_difference > 10:
        yoy_vs_sector_median_score = 5
    elif 0 < sector_difference <= 10:
        yoy_vs_sector_median_score = 4
    elif sector_difference <= 0 and yoy_diff > 0:
        yoy_vs_sector_median_score = 3
    elif sector_difference > 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 2
    elif sector_difference < 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 1
    else:
        yoy_vs_sector_median_score = 0

    growth_reportCard[f'EPS YoY vs Sector Median Score'] = yoy_vs_sector_median_score

print(adbe_profitability)
'''
Free Cash Flow
'''
# QoQ and YoY comparisons
for metric in ['freeCashFlow']:
    qoq_diff = adbe_growth['QoQ % Difference'].get(metric, 0)
    yoy_diff = adbe_growth['YoY % Difference'].get(metric, 0)
    five_year_avg = adbe_growth['5-year Average'].get(metric, 0)

    #Year over Year Percentage Scoring
    yoy_score = 5 if yoy_diff > 20 else \
        4 if 10 <= yoy_diff >= 0 else \
        3 if yoy_diff < five_year_avg and yoy_diff > 0 else \
        2 if yoy_diff > five_year_avg and yoy_diff < 0  else \
        1 if yoy_diff < five_year_avg and yoy_diff < 0  else \
        0

    growth_reportCard[f'Free Cash Flow YoY Score'] = yoy_score

    #Year over Year Compared to 5-Year Average
    fiveyear_difference = yoy_diff - five_year_avg

    if fiveyear_difference > 10:
        yoy_vs_5yr_median_score = 5
    elif 0 < fiveyear_difference <= 10:
        yoy_vs_5yr_median_score = 4
    elif fiveyear_difference <= 0 and yoy_diff > 0:
        yoy_vs_5yr_median_score = 3
    elif fiveyear_difference > 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 2
    elif fiveyear_difference < 0 and yoy_diff < 0:
        yoy_vs_5yr_median_score = 1
    else:
        yoy_vs_5yr_median_score = 0

    growth_reportCard[f'Free Cash Flow YoY vs Five Yr Avg Score'] = yoy_vs_5yr_median_score


    sector_difference = yoy_diff - sector_median_fcf_growth

    if sector_difference > 10:
        yoy_vs_sector_median_score = 5
    elif 0 < sector_difference <= 10:
        yoy_vs_sector_median_score = 4
    elif sector_difference <= 0 and yoy_diff > 0:
        yoy_vs_sector_median_score = 3
    elif sector_difference > 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 2
    elif sector_difference < 0 and yoy_diff < 0:
        yoy_vs_sector_median_score = 1
    else:
        yoy_vs_sector_median_score = 0

    growth_reportCard[f'Free Cash Flow YoY vs Sector Median Score'] = yoy_vs_sector_median_score


profitability_report_card = {}

# Extract relevant data from adbe_profitability
latest = adbe_profitability.get('latest', {})
prior_quarter = adbe_profitability.get('prior_quarter', {})
one_year = adbe_profitability.get('one_year', {})
five_year_avg = adbe_profitability.get('five_year_avg', {})


'''
Net Profit Margin
'''
# Loop over the metric 'netProfitMargin'
for metric in ['netProfitMargin']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'Net Profit Margin Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    profitability_report_card[f'Net Profit Margin Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    profitability_report_card[f'Net Profit Margin Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_netmargin_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    profitability_report_card[f'Net Profit Margin Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score



'''
Gross Profit Margin
'''
# Loop over the metric 'netProfitMargin'
for metric in ['grossProfitMargin']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'Gross Profit Margin Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    profitability_report_card[f'Gross Proft Margin Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    profitability_report_card[f'Gross Profit Margin Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_grossmargin_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    profitability_report_card[f'Gross Profit Margin Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score


'''
Operating Profit Margin
'''
# Loop over the metric 'netProfitMargin'
for metric in ['operatingProfitMargin']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'Operating Profit Margin Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    profitability_report_card[f'Operating Proft Margin Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    profitability_report_card[f'Operating Profit Margin Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_operatingmargin_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    profitability_report_card[f'Operating Profit Margin Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score


'''
FCF Profit Margin
'''
# Loop over the metric 'netProfitMargin'
for metric in ['freeCashFlowMargin']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'FCF Profit Margin Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    profitability_report_card[f'FCF Profit Margin Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    profitability_report_card[f'FCF Profit Margin Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_fcfmargin_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    profitability_report_card[f'FCF Profit Margin Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score



management_effectiveness_report_card = {}

# Extract relevant data from adbe_profitability
latest = adbe_management_effectiveness.get('latest', {})
prior_quarter = adbe_management_effectiveness.get('prior_quarter', {})
one_year = adbe_management_effectiveness.get('one_year', {})
five_year_avg = adbe_management_effectiveness.get('five_year_avg', {})


'''
Return on Assets
'''

# Loop over the metric 'netProfitMargin'
for metric in ['returnOnAssets']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'Return on Assets Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    management_effectiveness_report_card[f'Return on Assets Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    management_effectiveness_report_card[f'Return on Assets Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_roa_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    management_effectiveness_report_card[f'Return on Assets Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score



'''
Return on Equity
'''
# Loop over the metric 'netProfitMargin'
for metric in ['returnOnEquity']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    profitability_report_card[f'Return on Equity Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    management_effectiveness_report_card[f'Return on Equity Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    management_effectiveness_report_card[f'Return on Equity Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_roe_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    management_effectiveness_report_card[f'Return on Equity Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score


'''
Return on Total Capital Employed
'''
# Loop over the metric 'netProfitMargin'
for metric in ['returnOnEquity']:
    # Access the 'netProfitMargin' value in the latest dictionary
    latest_margin_value = latest.get(metric, 0)
    one_year_margin_value = one_year.get(metric, 0)
    five_year_avg_margin_value = five_year_avg.get(metric, 0)

    # Check if the latest_margin_value is greater than 5
    latest_margin = 5 if latest_margin_value > 5 else 0

    # Update the profitability report card
    management_effectiveness_report_card[f'Return on Total Capital Employed Latest Quarter Score'] = latest_margin

    #Year over Year Compared to 5-Year Average
    latest_vs_year = latest_margin_value - one_year_margin_value

    if latest_vs_year > 5:
        latest_vs_yr_score = 5
    elif 2 <= latest_vs_year <= 5:
        latest_vs_yr_score = 4
    elif -2 <= latest_vs_year <= 2:
        latest_vs_yr_score = 3
    elif -5 <= latest_vs_year <= -2:
        latest_vs_yr_score = 2
    elif latest_vs_year <= -5:
        latest_vs_yr_score = 1
    else:
        latest_vs_yr_score = 0

    management_effectiveness_report_card[f'Return on Total Capital Employed Latest Qtr vs Year Ago Score'] = latest_vs_yr_score


    latest_vs_five_year = latest_margin_value - five_year_avg_margin_value

    if latest_vs_five_year > 5:
        latest_vs_5yr_score = 5
    elif 2 <= latest_vs_five_year <= 5:
        latest_vs_5yr_score = 4
    elif -2 <= latest_vs_five_year <= 2:
        latest_vs_5yr_score = 3
    elif -5 <= latest_vs_five_year <= -2:
        latest_vs_5yr_score = 2
    elif latest_vs_five_year <= -5:
        latest_vs_5yr_score = 1
    else:
        latest_vs_5yr_score = 0

    management_effectiveness_report_card[f'Return on Total Capital Employed Latest Qtr vs 5-Year Avg Score'] = latest_vs_5yr_score


    latest_vs_sectormedian = latest_margin_value - sector_median_return_on_total_capital_growth

    if latest_vs_sectormedian > 5:
        latest_vs_sector_median_score = 5
    elif 2 <= latest_vs_sectormedian <= 5:
        latest_vs_sector_median_score = 4
    elif -2 <= latest_vs_sectormedian <= 2:
        latest_vs_sector_median_score = 3
    elif -5 <= latest_vs_sectormedian <= -2:
        latest_vs_sector_median_score = 2
    elif latest_vs_sectormedian <= -5:
        latest_vs_sector_median_score = 1
    else:
        latest_vs_sector_median_score = 0

    management_effectiveness_report_card[f'Return on Total Capital Employed Latest Qtr vs Sector Median Score'] = latest_vs_sector_median_score


fundamental/ADBE/Q4-2023/
{'Latest Date': '2023-11-30', 'latest': {'netProfitMargin': 29.377971473851026, 'operatingProfitMargin': 34.52852614896989, 'grossProfitMargin': 87.4405705229794, 'freeCashFlow': 155000000000.0, 'revenue': 504800000000.0, 'freeCashFlowMargin': 30.70522979397781}, 'prior_quarter': {'netProfitMargin': 28.69120654396728, 'operatingProfitMargin': 34.70347648261758, 'grossProfitMargin': 88.13905930470347, 'freeCashFlow': 178200000000.0, 'revenue': 489000000000.0, 'freeCashFlowMargin': 36.441717791411044}, 'one_year': {'netProfitMargin': 25.988950276243095, 'operatingProfitMargin': 33.25966850828729, 'grossProfitMargin': 87.4475138121547, 'freeCashFlow': 223400000000.0, 'revenue': 452500000000.0, 'freeCashFlowMargin': 49.370165745856355}, 'five_year_avg': {'netProfitMargin': 30.446147649363496, 'operatingProfitMargin': 33.53641572527869, 'grossProfitMargin': 87.07817681285023, 'freeCashFlow': 153131665000.0, 'revenue': 384196480000.0, 'freeCashFlowMargin': 39.797361

#Sentiment Report Card

In [37]:
from google.cloud import storage
import os
import json

# Set Google Cloud credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"

def list_subfolders(bucket_name, prefix):
    storage_client = storage.Client()
    iterator = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter='/')
    prefixes = set()

    for page in iterator.pages:
        prefixes.update(page.prefixes)

    return prefixes

def extract_quarter_year(folder_name):
    parts = folder_name.rstrip('/').split('/')[-1].split('-')
    if len(parts) == 2:
        quarter, year = parts
        return int(year), quarter
    return None, None

def sort_folders(subfolders):
    sorted_folders = sorted(subfolders, key=extract_quarter_year, reverse=True)
    return sorted_folders

def list_files_in_folder(bucket_name, folder_path):
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=folder_path, delimiter='/')

    files = []
    for blob in blobs:
        if not blob.name.endswith('/'):
            files.append(blob.name)

    return files

def download_file_to_variable(bucket_name, file_path):
    storage_client = storage.Client()
    blob = storage_client.bucket(bucket_name).blob(file_path)

    # Download the file's content into a variable
    file_contents = blob.download_as_bytes()
    return file_contents

def process_folder_for_average_score(bucket_name, folder):
    behavioral_path = folder + 'Sentiment/'
    files_in_behavioral = list_files_in_folder(bucket_name, behavioral_path)

    for file in files_in_behavioral:
        if file.endswith('AAL_average_score.json'):
            file_contents = download_file_to_variable(bucket_name, file)
            file_json = json.loads(file_contents.decode('utf-8'))
            return file_json['Average Score']


def download_specific_file(bucket_name, folder, file_name):
    file_path = folder + file_name
    return download_file_to_variable(bucket_name, file_path)

# Example usage
bucket_name = 'earningsedge'
prefix = 'Events/AAL/'
subfolders = list_subfolders(bucket_name, prefix)
sorted_subfolders = sort_folders(subfolders)

latest_quarter_folder = sorted_subfolders[0] if sorted_subfolders else None
second_latest_quarter_folder = sorted_subfolders[1] if len(sorted_subfolders) > 1 else None

latest_average_score = None
second_latest_average_score = None
latest_specific_file = None

if latest_quarter_folder:
    latest_average_score = process_folder_for_average_score(bucket_name, latest_quarter_folder)
    latest_specific_file_contents = download_specific_file(bucket_name, latest_quarter_folder, 'Sentiment/AAL-Q3-2023.json')
    latest_specific_file = json.loads(latest_specific_file_contents.decode('utf-8'))

if second_latest_quarter_folder:
    second_latest_average_score = process_folder_for_average_score(bucket_name, second_latest_quarter_folder)

print("Latest Quarter Average Score:", latest_average_score)
print("Second Latest Quarter Average Score:", second_latest_average_score)

sentiment_diff = latest_average_score - second_latest_average_score
print(f"Sentiment Difference: {sentiment_diff}")
print("Latest Quarter Specific File:", latest_specific_file)
# Initialize count variables
count_75_or_above = 0
count_minus_75_or_below = 0

# Iterate through each item and update counts
for item in latest_specific_file:
    score = item.get('Sentiment Score', 0)  # Using .get() to avoid KeyError if 'Sentiment Score' is missing
    if score >= 75:
        count_75_or_above += 1
    elif score <= -75:
        count_minus_75_or_below += 1


normalized_sentiment_score = (latest_average_score + 100) / 2
print("Count of Sentiment Scores >= 75:", count_75_or_above)
print("Count of Sentiment Scores <= -75:", count_minus_75_or_below)

sentiment_reportCard = {}
if sentiment_diff >= 80:
    sentiment_reportCard['Sentiment Difference'] = 5
elif 20 <= sentiment_diff < 79:
    sentiment_reportCard['Sentiment Difference'] = 4
elif -20 <= sentiment_diff < 20:
    sentiment_reportCard['Sentiment Difference'] = 3
elif -80 <= sentiment_diff < -20:
    sentiment_reportCard['Sentiment Difference'] = 2
elif sentiment_diff < -80:
    sentiment_reportCard['Sentiment Difference'] = 1
else:
    sentiment_reportCard['Sentiment Difference'] = 0

if normalized_sentiment_score >= 80:
    sentiment_reportCard['Sentiment Score'] = 5
elif 60 <= normalized_sentiment_score < 80:
    sentiment_reportCard['Sentiment Score'] = 4
elif 40 <= normalized_sentiment_score < 60:
    sentiment_reportCard['Sentiment Score'] = 3
elif 20 <= normalized_sentiment_score < 40 :
    sentiment_reportCard['Sentiment Score'] = 2
elif normalized_sentiment_score < -80:
    sentiment_reportCard['Sentiment Score'] = 1
else:
    sentiment_reportCard['Sentiment Score'] = 0

distribution_critical_statement = count_75_or_above - count_minus_75_or_below
if distribution_critical_statement >= 10:
    sentiment_reportCard['Sentiment Critical Statements Score'] = 5
elif 5 <= distribution_critical_statement < 10:
    sentiment_reportCard['Sentiment Critical Statements Score'] = 4
elif -4 <= distribution_critical_statement < 5:
    sentiment_reportCard['Sentiment Critical Statements Score'] = 3
elif -9 <= distribution_critical_statement < -5 :
    sentiment_reportCard['Sentiment Critical Statements Score'] = 2
elif distribution_critical_statement <= -10:
    sentiment_reportCard['Sentiment Critical Statements Score'] = 1
else:
    sentiment_reportCard['Sentiment Critical Statements Score'] = 0

print(sentiment_reportCard)

Latest Quarter Average Score: 43.575949367088604
Second Latest Quarter Average Score: 49.38135593220339
Sentiment Difference: -5.805406565114787
Latest Quarter Specific File: [{'Symbol': 'AAL', 'Sentence': "American Airlines Group Inc. (NASDAQ:AAL) Q3 2023 Earnings Conference Call October 19, 2023 8:30 AM ET Company Participants Scott Long - Vice President, Investor Relations and Corporate Development Robert Isom - Chief Executive Officer Devon May - Chief Financial Officer Vasu Raja - Chief Commercial Officer David Seymour - Chief Operating Officer Conference Call Participants Helane Becker - TD Cowen David Vernon - Bernstein Andrew Didora - Bank of America Michael Linenberg - Deutsche Bank Catherine O'Brien - Goldman Sachs Jamie Baker - JPMorgan Duane Pfennigwerth - Evercore ISI Conor Cunningham - Melius Research Savi Syth - Raymond James Daniel McKenzie - Seaport Global Mary Schlangenstein - Bloomberg News Alison Sider - Wall Street Journal Alexandra Scores - Dallas Morning News Les

#Behavioral Report Card

In [26]:
from google.cloud import storage
import os
import math
from google.cloud import storage
import json
import re
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"
def list_subfolders(bucket_name, prefix):
    storage_client = storage.Client()
    iterator = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter='/')
    prefixes = set()

    for page in iterator.pages:
        prefixes.update(page.prefixes)

    return prefixes

def extract_quarter_year(folder_name):
    parts = folder_name.rstrip('/').split('/')[-1].split('-')
    if len(parts) == 2:
        quarter, year = parts
        return int(year), quarter
    return None, None

def sort_folders(subfolders):
    sorted_folders = sorted(subfolders, key=extract_quarter_year, reverse=True)
    return sorted_folders

def list_files_in_folder(bucket_name, folder_path):
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=folder_path, delimiter='/')

    files = []
    for blob in blobs:
        if not blob.name.endswith('/'):
            files.append(blob.name)

    return files

# Example usage
bucket_name = 'earningsedge'
prefix = 'Events/AAL/'
subfolders = list_subfolders(bucket_name, prefix)
sorted_subfolders = sort_folders(subfolders)


def download_blob_to_variable(bucket_name, source_blob_name):
    """Downloads a blob's content and returns it as a Python dictionary."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    data = blob.download_as_text()
    return json.loads(data)

# Regex pattern to match files like 'AAPL_buckets_percentages.json'
pattern = r'^[A-Z]+_buckets_percentages\.json$'
#For Latest Quarter
latest_quarter_folder = sorted_subfolders[0] if sorted_subfolders else None
if latest_quarter_folder:
    behavioral_path = latest_quarter_folder + 'Behavioral/'
    files_in_behavioral = list_files_in_folder(bucket_name, behavioral_path)
    for file_name in files_in_behavioral:
        if re.match(pattern, file_name.split('/')[-1]):
            file_contents_latest = download_blob_to_variable(bucket_name, file_name)
            break



net_speaker_score = file_contents_latest['Positive Affect'] - file_contents_latest['Negative Affect'] - file_contents_latest['Uncertainty']
#For Previous Quarter
previous_quarter_folder = sorted_subfolders[1] if sorted_subfolders else None
if previous_quarter_folder:
    behavioral_path = previous_quarter_folder + 'Behavioral/'
    files_in_behavioral = list_files_in_folder(bucket_name, behavioral_path)
    for file_name in files_in_behavioral:
        if re.match(pattern, file_name.split('/')[-1]):
            file_contents_previous = download_blob_to_variable(bucket_name, file_name)
            break

previous_quarter_net_speaker_score = file_contents_previous['Positive Affect'] - file_contents_previous['Negative Affect'] - file_contents_previous['Uncertainty']
qoq_net_speaker_delta = net_speaker_score - previous_quarter_net_speaker_score




# File to look for
target_file = 'speakerprofile_buckets_percentages.json'

#Getting Speaker Baseline
file_contents = None
files_in_baseline = list_files_in_folder(bucket_name, prefix)
for file_name in files_in_baseline:
    if target_file in file_name:
        file_contents_baseline = download_blob_to_variable(bucket_name, file_name)
        print(f"Data from {file_name}: {file_contents}")
        break


positive_delta_from_baseline = file_contents_latest['Positive Affect'] - file_contents_baseline['Positive Affect']
negative_delta_from_baseline = file_contents_latest['Negative Affect'] - file_contents_baseline['Negative Affect']
uncertainty_delta_from_baseline = file_contents_latest['Uncertainty'] - file_contents_baseline['Uncertainty']

behavioral_ReportCard = {}
if net_speaker_score >= 50:
    behavioral_ReportCard['Behavioral Score'] = 5
elif 20 <= net_speaker_score < 50:
    behavioral_ReportCard['Behavioral Score'] = 4
elif 0 <= net_speaker_score < 20:
    behavioral_ReportCard['Behavioral Score'] = 3
elif -39 <= net_speaker_score < 0 :
    behavioral_ReportCard['Behavioral Score'] = 2
elif net_speaker_score <= -40:
    behavioral_ReportCard['Behavioral Score'] = 1
else:
    behavioral_ReportCard['Behavioral Score'] = 0


if qoq_net_speaker_delta >= 60:
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 5
elif 20 <= qoq_net_speaker_delta < 60:
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 4
elif -19 <= qoq_net_speaker_delta < 20:
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 3
elif -59 <= qoq_net_speaker_delta < -20 :
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 2
elif qoq_net_speaker_delta <= -60:
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 1
else:
    behavioral_ReportCard['Behavioral QoQ Delta Score'] = 0


if positive_delta_from_baseline >= 5:
    behavioral_ReportCard['Behavioral Positive Baseline Delta Score'] = 5
elif 2 <= positive_delta_from_baseline < 5:
    behavioral_ReportCard['Behavioral Positive Baseline QoQ Delta Score'] = 4
elif -2 <= positive_delta_from_baseline < 2:
    behavioral_ReportCard['Behavioral Positive Baseline QoQ Delta Score'] = 3
elif -5 <= positive_delta_from_baseline < -2 :
    behavioral_ReportCard['Behavioral Positive Baseline QoQ Delta Score'] = 2
elif positive_delta_from_baseline <= -5:
    behavioral_ReportCard['Behavioral Positive Baseline QoQ Delta Score'] = 1
else:
    behavioral_ReportCard['Behavioral Positive Baseline QoQ Delta Score'] = 0


if negative_delta_from_baseline >= 5:
    behavioral_ReportCard['Behavioral Negative Baseline Delta Score'] = 5
elif 2 <= negative_delta_from_baseline < 5:
    behavioral_ReportCard['Behavioral Negative Baseline QoQ Delta Score'] = 4
elif -2 <= negative_delta_from_baseline < 2:
    behavioral_ReportCard['Behavioral Negative Baseline QoQ Delta Score'] = 3
elif -5 <= negative_delta_from_baseline < -2 :
    behavioral_ReportCard['Behavioral Negative Baseline QoQ Delta Score'] = 2
elif negative_delta_from_baseline <= -5:
    behavioral_ReportCard['Behavioral Negative Baseline QoQ Delta Score'] = 1
else:
    behavioral_ReportCard['Behavioral Negative Baseline QoQ Delta Score'] = 0


if uncertainty_delta_from_baseline >= 5:
    behavioral_ReportCard['Behavioral Uncertainty Baseline Delta Score'] = 5
elif 2 <= uncertainty_delta_from_baseline < 5:
    behavioral_ReportCard['Behavioral Uncertainty Baseline QoQ Delta Score'] = 4
elif -2 <= uncertainty_delta_from_baseline < 2:
    behavioral_ReportCard['Behavioral Uncertainty Baseline QoQ Delta Score'] = 3
elif -5 <= uncertainty_delta_from_baseline < -2 :
    behavioral_ReportCard['Behavioral Uncertainty Baseline QoQ Delta Score'] = 2
elif uncertainty_delta_from_baseline <= -5:
    behavioral_ReportCard['Behavioral Uncertainty Baseline QoQ Delta Score'] = 1
else:
    behavioral_ReportCard['Behavioral Uncertainty Baseline QoQ Delta Score'] = 0


print(behavioral_ReportCard)

Data from Events/AAL/speakerprofile_buckets_percentages.json: None
{'Behavioral Score': 4, 'Behavioral QoQ Delta Score': 3, 'Behavioral Positive Baseline QoQ Delta Score': 1, 'Behavioral Negative Baseline QoQ Delta Score': 3, 'Behavioral Uncertainty Baseline QoQ Delta Score': 3}


#Report Card JSON Data


#Hume CleanupCode




In [None]:
import requests
from bs4 import BeautifulSoup
import requests
import json
import pandas as pd
import re
from difflib import SequenceMatcher
from collections import defaultdict
from google.cloud import storage
import requests
from hume import HumeBatchClient
from hume.models.config import FaceConfig
from hume.models.config import ProsodyConfig
from hume.models.config import ProsodyConfig, LanguageConfig
import os
import json
from google.cloud import storage
import subprocess
import os
import shutil

def upload_to_gcs_and_get_url(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket and returns the public url"""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    # Make the blob publicly viewable
    blob.make_public()
    return blob.public_url


def download_file(url, local_filename):
    """Download a file from a URL to a local path."""
    with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            shutil.copyfileobj(r.raw, f)
    return local_filename

def get_mp3_duration(file_path):
    """Get the duration of an MP3 file in seconds."""
    cmd = f"ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {file_path}"
    try:
        duration_str = subprocess.check_output(cmd, shell=True).decode('utf-8').strip()
        return float(duration_str)
    except subprocess.CalledProcessError as e:
        print(f"Error in getting duration: {e}")
        return 0

def trim_mp3_if_necessary(file_path, max_duration=59*60):
    """Trim the MP3 file to the maximum duration if necessary."""
    duration = get_mp3_duration(file_path)
    if duration > max_duration:
        trim_length = duration - max_duration
        temp_file_path = "temp_" + os.path.basename(file_path)
        cmd = f"ffmpeg -ss {trim_length} -i {file_path} -acodec copy {temp_file_path}"
        try:
            subprocess.check_output(cmd, shell=True)
            os.replace(temp_file_path, file_path)
            print(f"Trimmed {trim_length} seconds from the beginning of the file.")
        except subprocess.CalledProcessError as e:
            print(f"Error in trimming file: {e}")
            if os.path.exists(temp_file_path):
                os.remove(temp_file_path)


def process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year):
    # Get the blob of the JSON file in GCS
    blob = storage_client.get_bucket(bucket_name).blob(json_filepath)
    json_data = json.loads(blob.download_as_text())

    # Initialize emotion timeline list
    emotion_timeline = []

    # Extract the speaker predictions
    try:
        speaker_predictions = json_data['identified_speaker_data'][0]['predictions']

        # Create an index and populate the emotion timeline
        for prediction in speaker_predictions:
            begin_time = prediction['time']['begin']
            end_time = prediction['time']['end']
            text = prediction['text']
            emotions = prediction['emotions']

            # Create a new object for each time frame
            time_frame = {
                'begin_time': begin_time,
                'end_time': end_time,
                'text': text,
                'emotions': emotions
            }

            emotion_timeline.append(time_frame)

        # Convert emotion timeline to JSON
        emotion_timeline_json = json.dumps(emotion_timeline)

        # Upload emotion timeline JSON to the same bucket
        storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json')).upload_from_string(emotion_timeline_json)

        print('Emotion timeline JSON file uploaded successfully.')

        # Calculate the percent change in emotion scores
        calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)

    except KeyError:
        print('Invalid JSON format')

def calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    prev_values = {}
    percent_change_timeline = []

    for i in range(len(emotion_timeline)):
        current_time_frame = emotion_timeline[i]
        current_emotions = current_time_frame['emotions']
        current_text = current_time_frame['text']

        new_emotions = []

        for emotion in current_emotions:
            emotion_name = emotion['name']
            current_score = emotion['score']

            if emotion_name in prev_values:
                old_value = prev_values[emotion_name]
                percent_change = ((current_score - old_value) / old_value) * 100 if old_value != 0 else 0
                new_emotions.append({"name": emotion_name, "percent_change": percent_change})
            else:
                new_emotions.append({"name": emotion_name, "percent_change": 0})

            prev_values[emotion_name] = current_score

        percent_change_timeline.append({'text': current_text, 'percent_change_emotions': new_emotions})

    percent_change_timeline_json = json.dumps(percent_change_timeline)

    # Upload percent change timeline JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-PrcChgTimeline-{quarter}-{year}.json')).upload_from_string(percent_change_timeline_json)

    print('Percent change timeline JSON file uploaded successfully.')



import json
from google.cloud import storage

def calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    emotion_scores = {}

    # Iterate over each time frame in the emotion timeline
    for time_frame in emotion_timeline:
        print(emotion_timeline)
        emotions = time_frame['emotions']

        # Aggregate the scores for each emotion
        for emotion in emotions:
            emotion_name = emotion['name']
            score = emotion['score']

            # If the emotion is encountered for the first time, initialize the score
            if emotion_name not in emotion_scores:
                emotion_scores[emotion_name] = score
            else:
                # Otherwise, accumulate the scores for the emotion
                emotion_scores[emotion_name] += score

    # Calculate the average score for each emotion
    num_time_frames = len(emotion_timeline)
    average_emotion_scores = {}

    for emotion_name, score in emotion_scores.items():
        average_score = score / num_time_frames
        average_emotion_scores[emotion_name] = average_score

    # Convert average emotion scores to JSON
    average_emotion_scores_json = json.dumps(average_emotion_scores)

    # Upload average emotion scores JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f"{symbol}-AvgEmotions-{quarter}-{year}.json")).upload_from_string(average_emotion_scores_json)

    print('Average emotion scores JSON file uploaded successfully.')


def upload_to_bucket(blob_name, path_to_file, bucket_name):
    """ Upload data to a bucket"""
    # Explicitly use service account credentials by specifying the private key
    # file.
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

    #print(blobs = storage_client.list_blobs(bucket_name))
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(path_to_file)

    #returns a public url
    return blob.public_url

client = HumeBatchClient("3QBfsJ4JPV5yadXAZRaehTfUXfVTYjUd3nqodLZXvbIdKnwn")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"

def blob_exists(bucket_name, blob_name):
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    return blob.exists()


storage_client = storage.Client()
bucket = storage_client.bucket('earningsedge')

# Define headers once, as they don't change
headers = {
    "X-RapidAPI-Key": "780f2a96d1mshd6f358e08848cd1p18cd1ajsne72c995bab67",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
}

from difflib import SequenceMatcher

def similarity_ratio(text1, text2):
    return SequenceMatcher(None, text1, text2).ratio()
#ADP still needs one more quarter so does DE and I think GILD
symbols = ['PANW']
#stocks = ['INTU', 'UNP', 'HON', 'MDT', 'TJX', 'AXP', 'DE', 'GILD', 'ADP', 'PGR', 'C', 'PANW', 'MU', 'CI', 'DUK', 'CMG', 'HUM', 'MAR', 'MET', 'D', 'LHX', 'FTNT', 'DG', 'LUV', 'DGX', 'BBY', 'LYV', 'KMX', 'ETSY']
#symbols = ['ADBE', 'GILD', 'ADP', 'PGR', 'C', 'PANW', 'MU', 'CI', 'DUK', 'CMG', 'HUM', 'MAR', 'MET', 'D', 'LHX', 'FTNT', 'DG', 'LUV', 'DGX', 'BBY', 'LYV', 'KMX', 'ETSY']
# Function to find the CEO's name from the transcript
def find_ceo_name(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    titles_hierarchy = [
        'Chief Executive Officer',
        'CEO',
        'Co-Chief Executive Officer',
        'Co-CEO',
        'Chief Financial Officer',
        'CFO'
    ]
    # Look for the names in the 'Company Participants' section
    participants = soup.find_all('p')
    for participant in participants:
        parts = participant.text.split(' - ')
        for part in parts:
            for title in titles_hierarchy:
                if title in part:
                    # Assuming the name is the other part of the split
                    name_index = 1 - parts.index(part)
                    ceo_name = parts[name_index].strip()
                    return ceo_name
    return None


# Function to extract all statements made by the CEO from the transcript
def extract_ceo_statements(html_content, ceo_name):
    soup = BeautifulSoup(html_content, 'html.parser')
    ceo_statements = []
    # Find the tag that contains the CEO's name
    ceo_tag = soup.find(lambda tag: tag.name == 'strong' and ceo_name in tag.get_text())
    if ceo_tag:
        # Collect all the statements made by the CEO
        for tag in ceo_tag.find_all_next(['p', 'strong']):
            # If a strong tag is found, break the loop as it indicates a new speaker
            if tag.name == 'strong' and tag.get_text() != ceo_name:
                break
            if tag.name == 'p':
                ceo_statements.append(tag.get_text().strip())
    return ceo_statements

iteration_count = 0


for symbol in symbols:
  quarters_processed = 0
  while quarters_processed < 4:
    try:
        # Get the list of transcripts for the symbol
        list_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/list"
        list_querystring = {"id": symbol, "size": "20", "number": "1"}
        list_response = requests.get(list_url, headers=headers, params=list_querystring)

        # Check if the response was successful
        if list_response.ok:
            print('okay')
            iteration_count += 1
            # If the counter reaches four, stop the loop
            if iteration_count > 4:
                print("Processed four earnings calls. Stopping.")
                break
            list_data = list_response.json()
            found_earnings_call = False  # Flag to indicate if the earnings call is found

            for item in list_data['data']:
                title = item['attributes']['title']

                # Check if the title matches the criteria
                if "Earnings" in title and "Call" in title and "Transcript" and symbol.upper() in title:
                    found_earnings_call = True  # Set the flag to True as we found the earnings call
                    print(title)
                    call_id = item['id']

                    # Extract quarter and year from the title
                    parts = title.split()
                    for part in parts:
                        if "Q" in part and part[1:].isdigit():
                            quarter = part  # This will be something like 'Q3'
                        if part.isdigit() and len(part) == 4:
                            year = part  # This will be something like '2023'

                                # Existing code to extract quarter and year...


                    output_file_path = f"Events/{symbol}/{quarter}-{year}/Behavioral/identified_speaker_data_{symbol}_{year}_{quarter}.json"

                    #Check if the processed call already exists in the bucket
                    if blob_exists("earningsedge", output_file_path):
                        print(f"Processed call {id} for symbol {symbol} already exists in the bucket. Continuing.")
                        continue  # Return false if blob exists

                    #Run Hume with call_id
                    print(f'Processing {title} call')
                    urls = [f"https://static.seekingalpha.com/cdn/s3/transcripts_audio/{call_id}.mp3"]
                    for url in urls:
                      local_mp3_path = download_file(url, f"{call_id}.mp3")
                      trim_mp3_if_necessary(local_mp3_path)
                      gcs_public_url = upload_to_gcs_and_get_url('earningsedge', local_mp3_path, f'trimmed_audio/{call_id}.mp3')

                      configs = [ProsodyConfig(granularity='sentence', identify_speakers=True, window={'length': 4.0, 'step': 1.0}), LanguageConfig(identify_speakers=True, granularity='sentence')]
                      job = client.submit_job([gcs_public_url], configs)
                      job.await_complete(timeout=300000)
                      job.download_predictions(f"predictions_{id}.json")


                      with open(f'predictions_{id}.json', 'r') as file:
                          hume_data = json.load(file)

                      hume_speakers_text = defaultdict(str)

                      for result in hume_data:
                          predictions = result.get('results', {}).get('predictions', [])
                          for prediction in predictions:
                              models = prediction.get('models', {})
                              for model in models.values():
                                  grouped_predictions = model.get('grouped_predictions', [])

                                  for gp in grouped_predictions:
                                      speaker_id = gp.get('id')
                                      speaker_text = [p.get('text') for p in gp.get('predictions')]
                                      hume_speakers_text[speaker_id] += ' '.join(speaker_text)



                      # Get the details of the earnings call
                      details_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/get-details"
                      details_querystring = {"id": call_id}
                      details_response = requests.get(details_url, headers=headers, params=details_querystring)

                      # Check if the details response was successful
                      if details_response.ok:
                          details_data = details_response.json()
                          # Extracting the raw html transcript
                          htmlTranscript = details_data['data']['attributes']['content']

                          # Use the function to find the CEO's name from the htmlTranscript
                          ceo_name = find_ceo_name(htmlTranscript)
                          if ceo_name:
                              print(f"CEO's Name: {ceo_name}")

                              # Extract the CEO's statements
                              ceo_statements = extract_ceo_statements(htmlTranscript, ceo_name)
                              # ceo_statements should be a concatenated string of all CEO statements
                              ceo_text = ' '.join(ceo_statements)
                              if ceo_statements:
                                  print(f"Statements made by {ceo_name}:")
                                  for statement in ceo_statements:
                                      print(statement)

                              else:
                                  print(f"No statements found for {ceo_name}.")
                              # Define the range of characters to use for comparison
                              char_range_start = 50
                              char_range_end = 800

                              # Extract a slice of the CEO's statements within the specified range
                              # Ensure that the text is long enough for the slice
                              ceo_text_slice = ceo_text[char_range_start:char_range_end] if len(ceo_text) > char_range_end else ceo_text

                              # Store the most similar speaker and the highest similarity ratio found
                              most_similar_speaker_id = None
                              highest_similarity_ratio = 0

                              # Compare the sliced statements of each speaker with the CEO's sliced statements
                              for speaker_id, speaker_text in hume_speakers_text.items():
                                  # Take a slice of the speaker's text
                                  speaker_text_slice = speaker_text[char_range_start:char_range_end] if len(speaker_text) > char_range_end else speaker_text
                                  # Calculate the similarity ratio for the sliced texts
                                  ratio = similarity_ratio(ceo_text_slice, speaker_text_slice)
                                  if ratio > highest_similarity_ratio:
                                      highest_similarity_ratio = ratio
                                      most_similar_speaker_id = speaker_id

                              identified_speaker_data = [gp for gp in grouped_predictions if gp.get('id') == most_similar_speaker_id]
                              # Write the identified speaker's data to a JSON file for this call
                              identified_speaker_dict = {
                                          "identified_speaker_data": identified_speaker_data
                                      }

                              os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

                              with open(output_file_path, "w") as output_file:
                                      json.dump(identified_speaker_dict, output_file)


                              bucket_name = 'earningsedge'
                              upload_to_bucket(output_file_path, output_file_path, bucket_name)
                              local_file_path = f"local_{output_file_path}"
                              blob_name = f"{output_file_path}"
                              upload_to_bucket(local_file_path, blob_name, bucket_name)

                              print(f"Identified speaker's data for {symbol} {year} {quarter} has been saved to: {output_file_path}")

                              # Initialize the Google Cloud Storage client
                              storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

                              # Your bucket name
                              bucket_name = 'earningsedge'
                              json_filepath = output_file_path

                              # Generate the emotion timeline for the current speaker
                              process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year)

                              # Load the emotion timeline JSON file
                              blob = storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json'))
                              emotion_timeline_json = blob.download_as_text()
                              emotion_timeline = json.loads(emotion_timeline_json)

                              # Calculate the average emotion scores and push to GCS as JSON
                              calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)



                              # After the loop, most_similar_speaker_id holds the ID of the most similar speaker
                              if most_similar_speaker_id is not None:
                                  print(f"The speaker most similar to the CEO within the character range {char_range_start}-{char_range_end} is: Speaker {most_similar_speaker_id} with a similarity ratio of {highest_similarity_ratio:.2f}")
                              else:
                                  print("No similar speaker found within the specified character range.")

                          else:
                              print("CEO's name not found in the transcript.")

                          break  # Stop iterating as we found the earnings call
                      else:
                          print(f"Failed to get details for {call_id}")
                          break  # Stop iterating as we found the earnings call but couldn't get details
                    quarters_processed += 1
                if not found_earnings_call:
                    # If we didn't find an earnings call, print a message
                    print(f"No earnings call transcript found for {symbol}")

            else:
                print(f"Failed to get list for {symbol}")

        # Handle exceptions that may occur during the API requests
    except requests.exceptions.RequestException as e:
            print(f"An error occurred: {e}")




ModuleNotFoundError: ignored

In [None]:
!pip install Hume

In [None]:
import requests
from bs4 import BeautifulSoup
import requests
import json
import pandas as pd
import re
from difflib import SequenceMatcher
from collections import defaultdict
from google.cloud import storage
import requests
from hume import HumeBatchClient
from hume.models.config import FaceConfig
from hume.models.config import ProsodyConfig
from hume.models.config import ProsodyConfig, LanguageConfig
import os
import json
from google.cloud import storage
import subprocess
import os



def process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year):
    # Get the blob of the JSON file in GCS
    blob = storage_client.get_bucket(bucket_name).blob(json_filepath)
    json_data = json.loads(blob.download_as_text())

    # Initialize emotion timeline list
    emotion_timeline = []

    # Extract the speaker predictions
    try:
        speaker_predictions = json_data['identified_speaker_data'][0]['predictions']

        # Create an index and populate the emotion timeline
        for prediction in speaker_predictions:
            begin_time = prediction['time']['begin']
            end_time = prediction['time']['end']
            text = prediction['text']
            emotions = prediction['emotions']

            # Create a new object for each time frame
            time_frame = {
                'begin_time': begin_time,
                'end_time': end_time,
                'text': text,
                'emotions': emotions
            }

            emotion_timeline.append(time_frame)

        # Convert emotion timeline to JSON
        emotion_timeline_json = json.dumps(emotion_timeline)

        # Upload emotion timeline JSON to the same bucket
        storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json')).upload_from_string(emotion_timeline_json)

        print('Emotion timeline JSON file uploaded successfully.')

        # Calculate the percent change in emotion scores
        calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)

    except KeyError:
        print('Invalid JSON format')

def calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    prev_values = {}
    percent_change_timeline = []

    for i in range(len(emotion_timeline)):
        current_time_frame = emotion_timeline[i]
        current_emotions = current_time_frame['emotions']
        current_text = current_time_frame['text']

        new_emotions = []

        for emotion in current_emotions:
            emotion_name = emotion['name']
            current_score = emotion['score']

            if emotion_name in prev_values:
                old_value = prev_values[emotion_name]
                percent_change = ((current_score - old_value) / old_value) * 100 if old_value != 0 else 0
                new_emotions.append({"name": emotion_name, "percent_change": percent_change})
            else:
                new_emotions.append({"name": emotion_name, "percent_change": 0})

            prev_values[emotion_name] = current_score

        percent_change_timeline.append({'text': current_text, 'percent_change_emotions': new_emotions})

    percent_change_timeline_json = json.dumps(percent_change_timeline)

    # Upload percent change timeline JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-PrcChgTimeline-{quarter}-{year}.json')).upload_from_string(percent_change_timeline_json)

    print('Percent change timeline JSON file uploaded successfully.')



import json
from google.cloud import storage

def calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    emotion_scores = {}

    # Iterate over each time frame in the emotion timeline
    for time_frame in emotion_timeline:
        print(emotion_timeline)
        emotions = time_frame['emotions']

        # Aggregate the scores for each emotion
        for emotion in emotions:
            emotion_name = emotion['name']
            score = emotion['score']

            # If the emotion is encountered for the first time, initialize the score
            if emotion_name not in emotion_scores:
                emotion_scores[emotion_name] = score
            else:
                # Otherwise, accumulate the scores for the emotion
                emotion_scores[emotion_name] += score

    # Calculate the average score for each emotion
    num_time_frames = len(emotion_timeline)
    average_emotion_scores = {}

    for emotion_name, score in emotion_scores.items():
        average_score = score / num_time_frames
        average_emotion_scores[emotion_name] = average_score

    # Convert average emotion scores to JSON
    average_emotion_scores_json = json.dumps(average_emotion_scores)

    # Upload average emotion scores JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f"{symbol}-AvgEmotions-{quarter}-{year}.json")).upload_from_string(average_emotion_scores_json)

    print('Average emotion scores JSON file uploaded successfully.')


def upload_to_bucket(blob_name, path_to_file, bucket_name):
    """ Upload data to a bucket"""
    # Explicitly use service account credentials by specifying the private key
    # file.
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

    #print(blobs = storage_client.list_blobs(bucket_name))
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(path_to_file)

    #returns a public url
    return blob.public_url

client = HumeBatchClient("3QBfsJ4JPV5yadXAZRaehTfUXfVTYjUd3nqodLZXvbIdKnwn")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"

def blob_exists(bucket_name, blob_name):
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    return blob.exists()


storage_client = storage.Client()
bucket = storage_client.bucket('earningsedge')

# Define headers once, as they don't change
headers = {
    "X-RapidAPI-Key": "780f2a96d1mshd6f358e08848cd1p18cd1ajsne72c995bab67",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
}

from difflib import SequenceMatcher

def similarity_ratio(text1, text2):
    return SequenceMatcher(None, text1, text2).ratio()


stocks = ['INTU', 'UNP', 'HON', 'MDT', 'TJX', 'AXP', 'DE', 'GILD', 'ADP', 'PGR', 'C', 'PANW', 'MU', 'CI', 'DUK', 'CMG', 'HUM', 'MAR', 'MET', 'D', 'LHX', 'FTNT', 'DG', 'LUV', 'DGX', 'BBY', 'LYV', 'KMX', 'ETSY']
symbols = ['UNP']
# Function to find the CEO's name from the transcript
def find_ceo_name(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    titles_hierarchy = [
        'Chief Executive Officer',
        'CEO',
        'Co-Chief Executive Officer',
        'Co-CEO',
        'Chief Financial Officer',
        'CFO'
    ]
    # Look for the names in the 'Company Participants' section
    participants = soup.find_all('p')
    for participant in participants:
        parts = participant.text.split(' - ')
        for part in parts:
            for title in titles_hierarchy:
                if title in part:
                    # Assuming the name is the other part of the split
                    name_index = 1 - parts.index(part)
                    ceo_name = parts[name_index].strip()
                    return ceo_name
    return None


# Function to extract all statements made by the CEO from the transcript
def extract_ceo_statements(html_content, ceo_name):
    soup = BeautifulSoup(html_content, 'html.parser')
    ceo_statements = []
    # Find the tag that contains the CEO's name
    ceo_tag = soup.find(lambda tag: tag.name == 'strong' and ceo_name in tag.get_text())
    if ceo_tag:
        # Collect all the statements made by the CEO
        for tag in ceo_tag.find_all_next(['p', 'strong']):
            # If a strong tag is found, break the loop as it indicates a new speaker
            if tag.name == 'strong' and tag.get_text() != ceo_name:
                break
            if tag.name == 'p':
                ceo_statements.append(tag.get_text().strip())
    return ceo_statements

iteration_count = 0


for symbol in symbols:
  quarters_processed = 0
  while quarters_processed < 4:
    try:
        # Get the list of transcripts for the symbol
        list_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/list"
        list_querystring = {"id": symbol, "size": "20", "number": "1"}
        list_response = requests.get(list_url, headers=headers, params=list_querystring)

        # Check if the response was successful
        if list_response.ok:
            print('okay')
            iteration_count += 1
            # If the counter reaches four, stop the loop
            if iteration_count > 4:
                print("Processed four earnings calls. Stopping.")
                break
            list_data = list_response.json()
            found_earnings_call = False  # Flag to indicate if the earnings call is found

            for item in list_data['data']:
                title = item['attributes']['title']

                # Check if the title matches the criteria
                if "Earnings" in title and "Call" in title and "Transcript" and symbol.upper() in title:
                    found_earnings_call = True  # Set the flag to True as we found the earnings call
                    print(title)
                    call_id = item['id']

                    # Extract quarter and year from the title
                    parts = title.split()
                    for part in parts:
                        if "Q" in part and part[1:].isdigit():
                            quarter = part  # This will be something like 'Q3'
                        if part.isdigit() and len(part) == 4:
                            year = part  # This will be something like '2023'

                                # Existing code to extract quarter and year...


                    output_file_path = f"Events/{symbol}/{quarter}-{year}/Behavioral/identified_speaker_data_{symbol}_{year}_{quarter}.json"

                    #Check if the processed call already exists in the bucket
                    if blob_exists("earningsedge", output_file_path):
                        print(f"Processed call {id} for symbol {symbol} already exists in the bucket. Continuing.")
                        continue  # Return false if blob exists

                    #Run Hume with call_id
                    print(f'Processing {title} call')
                    urls = [f"https://static.seekingalpha.com/cdn/s3/transcripts_audio/{call_id}.mp3"]

                    configs = [ProsodyConfig(granularity='sentence', identify_speakers=True, window={'length': 4.0, 'step': 1.0}), LanguageConfig(identify_speakers=True, granularity='sentence')]
                    job = client.submit_job(urls, configs)
                    job.await_complete(timeout=300000)
                    job.download_predictions(f"predictions_{id}.json")


                    with open(f'predictions_{id}.json', 'r') as file:
                        hume_data = json.load(file)

                    hume_speakers_text = defaultdict(str)

                    for result in hume_data:
                        predictions = result.get('results', {}).get('predictions', [])
                        for prediction in predictions:
                            models = prediction.get('models', {})
                            for model in models.values():
                                grouped_predictions = model.get('grouped_predictions', [])

                                for gp in grouped_predictions:
                                    speaker_id = gp.get('id')
                                    speaker_text = [p.get('text') for p in gp.get('predictions')]
                                    hume_speakers_text[speaker_id] += ' '.join(speaker_text)



                    # Get the details of the earnings call
                    details_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/get-details"
                    details_querystring = {"id": call_id}
                    details_response = requests.get(details_url, headers=headers, params=details_querystring)

                    # Check if the details response was successful
                    if details_response.ok:
                        details_data = details_response.json()
                        # Extracting the raw html transcript
                        htmlTranscript = details_data['data']['attributes']['content']

                        # Use the function to find the CEO's name from the htmlTranscript
                        ceo_name = find_ceo_name(htmlTranscript)
                        if ceo_name:
                            print(f"CEO's Name: {ceo_name}")

                            # Extract the CEO's statements
                            ceo_statements = extract_ceo_statements(htmlTranscript, ceo_name)
                            # ceo_statements should be a concatenated string of all CEO statements
                            ceo_text = ' '.join(ceo_statements)
                            if ceo_statements:
                                print(f"Statements made by {ceo_name}:")
                                for statement in ceo_statements:
                                    print(statement)

                            else:
                                print(f"No statements found for {ceo_name}.")
                            # Define the range of characters to use for comparison
                            char_range_start = 50
                            char_range_end = 800

                            # Extract a slice of the CEO's statements within the specified range
                            # Ensure that the text is long enough for the slice
                            ceo_text_slice = ceo_text[char_range_start:char_range_end] if len(ceo_text) > char_range_end else ceo_text

                            # Store the most similar speaker and the highest similarity ratio found
                            most_similar_speaker_id = None
                            highest_similarity_ratio = 0

                            # Compare the sliced statements of each speaker with the CEO's sliced statements
                            for speaker_id, speaker_text in hume_speakers_text.items():
                                # Take a slice of the speaker's text
                                speaker_text_slice = speaker_text[char_range_start:char_range_end] if len(speaker_text) > char_range_end else speaker_text
                                # Calculate the similarity ratio for the sliced texts
                                ratio = similarity_ratio(ceo_text_slice, speaker_text_slice)
                                if ratio > highest_similarity_ratio:
                                    highest_similarity_ratio = ratio
                                    most_similar_speaker_id = speaker_id

                            identified_speaker_data = [gp for gp in grouped_predictions if gp.get('id') == most_similar_speaker_id]
                            # Write the identified speaker's data to a JSON file for this call
                            identified_speaker_dict = {
                                        "identified_speaker_data": identified_speaker_data
                                    }

                            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

                            with open(output_file_path, "w") as output_file:
                                    json.dump(identified_speaker_dict, output_file)


                            bucket_name = 'earningsedge'
                            upload_to_bucket(output_file_path, output_file_path, bucket_name)
                            local_file_path = f"local_{output_file_path}"
                            blob_name = f"{output_file_path}"
                            upload_to_bucket(local_file_path, blob_name, bucket_name)

                            print(f"Identified speaker's data for {symbol} {year} {quarter} has been saved to: {output_file_path}")

                            # Initialize the Google Cloud Storage client
                            storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

                            # Your bucket name
                            bucket_name = 'earningsedge'
                            json_filepath = output_file_path

                            # Generate the emotion timeline for the current speaker
                            process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year)

                            # Load the emotion timeline JSON file
                            blob = storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json'))
                            emotion_timeline_json = blob.download_as_text()
                            emotion_timeline = json.loads(emotion_timeline_json)

                             # Calculate the average emotion scores and push to GCS as JSON
                            calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)



                            # After the loop, most_similar_speaker_id holds the ID of the most similar speaker
                            if most_similar_speaker_id is not None:
                                print(f"The speaker most similar to the CEO within the character range {char_range_start}-{char_range_end} is: Speaker {most_similar_speaker_id} with a similarity ratio of {highest_similarity_ratio:.2f}")
                            else:
                                print("No similar speaker found within the specified character range.")

                        else:
                            print("CEO's name not found in the transcript.")

                        break  # Stop iterating as we found the earnings call
                    else:
                        print(f"Failed to get details for {call_id}")
                        break  # Stop iterating as we found the earnings call but couldn't get details
                quarters_processed += 1
            if not found_earnings_call:
                # If we didn't find an earnings call, print a message
                print(f"No earnings call transcript found for {symbol}")

        else:
            print(f"Failed to get list for {symbol}")

    # Handle exceptions that may occur during the API requests
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")





#Hume Actual Code


In [None]:
import requests
from bs4 import BeautifulSoup
import requests
import json
import pandas as pd
import re
from difflib import SequenceMatcher
from collections import defaultdict
from google.cloud import storage
import requests
from hume import HumeBatchClient
from hume.models.config import FaceConfig
from hume.models.config import ProsodyConfig
from hume.models.config import ProsodyConfig, LanguageConfig
import os
import json
from google.cloud import storage



def process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year):
    # Get the blob of the JSON file in GCS
    blob = storage_client.get_bucket(bucket_name).blob(json_filepath)
    json_data = json.loads(blob.download_as_text())

    # Initialize emotion timeline list
    emotion_timeline = []

    # Extract the speaker predictions
    try:
        speaker_predictions = json_data['identified_speaker_data'][0]['predictions']

        # Create an index and populate the emotion timeline
        for prediction in speaker_predictions:
            begin_time = prediction['time']['begin']
            end_time = prediction['time']['end']
            text = prediction['text']
            emotions = prediction['emotions']

            # Create a new object for each time frame
            time_frame = {
                'begin_time': begin_time,
                'end_time': end_time,
                'text': text,
                'emotions': emotions
            }

            emotion_timeline.append(time_frame)

        # Convert emotion timeline to JSON
        emotion_timeline_json = json.dumps(emotion_timeline)

        # Upload emotion timeline JSON to the same bucket
        storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json')).upload_from_string(emotion_timeline_json)

        print('Emotion timeline JSON file uploaded successfully.')

        # Calculate the percent change in emotion scores
        calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)

    except KeyError:
        print('Invalid JSON format')

def calculate_percent_change(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    prev_values = {}
    percent_change_timeline = []

    for i in range(len(emotion_timeline)):
        current_time_frame = emotion_timeline[i]
        current_emotions = current_time_frame['emotions']
        current_text = current_time_frame['text']

        new_emotions = []

        for emotion in current_emotions:
            emotion_name = emotion['name']
            current_score = emotion['score']

            if emotion_name in prev_values:
                old_value = prev_values[emotion_name]
                percent_change = ((current_score - old_value) / old_value) * 100 if old_value != 0 else 0
                new_emotions.append({"name": emotion_name, "percent_change": percent_change})
            else:
                new_emotions.append({"name": emotion_name, "percent_change": 0})

            prev_values[emotion_name] = current_score

        percent_change_timeline.append({'text': current_text, 'percent_change_emotions': new_emotions})

    percent_change_timeline_json = json.dumps(percent_change_timeline)

    # Upload percent change timeline JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-PrcChgTimeline-{quarter}-{year}.json')).upload_from_string(percent_change_timeline_json)

    print('Percent change timeline JSON file uploaded successfully.')



import json
from google.cloud import storage

def calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year):
    emotion_scores = {}

    # Iterate over each time frame in the emotion timeline
    for time_frame in emotion_timeline:
        print(emotion_timeline)
        emotions = time_frame['emotions']

        # Aggregate the scores for each emotion
        for emotion in emotions:
            emotion_name = emotion['name']
            score = emotion['score']

            # If the emotion is encountered for the first time, initialize the score
            if emotion_name not in emotion_scores:
                emotion_scores[emotion_name] = score
            else:
                # Otherwise, accumulate the scores for the emotion
                emotion_scores[emotion_name] += score

    # Calculate the average score for each emotion
    num_time_frames = len(emotion_timeline)
    average_emotion_scores = {}

    for emotion_name, score in emotion_scores.items():
        average_score = score / num_time_frames
        average_emotion_scores[emotion_name] = average_score

    # Convert average emotion scores to JSON
    average_emotion_scores_json = json.dumps(average_emotion_scores)

    # Upload average emotion scores JSON to the same bucket
    storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f"{symbol}-AvgEmotions-{quarter}-{year}.json")).upload_from_string(average_emotion_scores_json)

    print('Average emotion scores JSON file uploaded successfully.')


def upload_to_bucket(blob_name, path_to_file, bucket_name):
    """ Upload data to a bucket"""
    # Explicitly use service account credentials by specifying the private key
    # file.
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

    #print(blobs = storage_client.list_blobs(bucket_name))
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(path_to_file)

    #returns a public url
    return blob.public_url

client = HumeBatchClient("3QBfsJ4JPV5yadXAZRaehTfUXfVTYjUd3nqodLZXvbIdKnwn")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"

def blob_exists(bucket_name, blob_name):
    storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    return blob.exists()


storage_client = storage.Client()
bucket = storage_client.bucket('earningsedge')

# Define headers once, as they don't change
headers = {
    "X-RapidAPI-Key": "780f2a96d1mshd6f358e08848cd1p18cd1ajsne72c995bab67",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
}

from difflib import SequenceMatcher

def similarity_ratio(text1, text2):
    return SequenceMatcher(None, text1, text2).ratio()


stocks = ['AAL', 'AAPL', 'MSFT', 'GOOG', 'AMZN', 'NVDA', 'ET', 'TSLA','META', 'TSM','LLY', 'V', 'WMT', 'XOM', 'WMT', 'JPM', 'MA', 'PG', 'NVO', 'AVGO', 'ORCL', 'HD', 'CVX', 'ASML', 'MRK', 'KO', 'PEP', 'COST', 'ABBV', 'BABA', 'BAC', 'NOW', 'ADBE', 'AZN', 'MCD', 'NVS', 'CSCO', 'CRM', 'PFE', 'SHEL', 'ACN', 'NFLX', 'DIS', 'AMD', 'NKE', 'WFC', 'INTC', 'VZ', 'LOW', 'BA', 'GE', 'UBER', 'PANW', 'PM', 'QCOM', 'MRNA', 'LULU', 'F', 'SBUX', 'TGT', 'DAL', 'PYPL', 'GM', 'SQ', 'ZM', 'WBA', 'INTC', 'ON', 'ZM', 'PLTR', 'TSN', 'NIO', 'NEE']

# Function to find the CEO's name from the transcript
def find_ceo_name(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    titles_hierarchy = [
        'Chief Executive Officer',
        'CEO',
        'Co-Chief Executive Officer',
        'Co-CEO',
        'Chief Financial Officer',
        'CFO'
    ]
    # Look for the names in the 'Company Participants' section
    participants = soup.find_all('p')
    for participant in participants:
        parts = participant.text.split(' - ')
        for part in parts:
            for title in titles_hierarchy:
                if title in part:
                    # Assuming the name is the other part of the split
                    name_index = 1 - parts.index(part)
                    ceo_name = parts[name_index].strip()
                    return ceo_name
    return None


# Function to extract all statements made by the CEO from the transcript
def extract_ceo_statements(html_content, ceo_name):
    soup = BeautifulSoup(html_content, 'html.parser')
    ceo_statements = []
    # Find the tag that contains the CEO's name
    ceo_tag = soup.find(lambda tag: tag.name == 'strong' and ceo_name in tag.get_text())
    if ceo_tag:
        # Collect all the statements made by the CEO
        for tag in ceo_tag.find_all_next(['p', 'strong']):
            # If a strong tag is found, break the loop as it indicates a new speaker
            if tag.name == 'strong' and tag.get_text() != ceo_name:
                break
            if tag.name == 'p':
                ceo_statements.append(tag.get_text().strip())
    return ceo_statements

for symbol in symbols:
    try:
        # Get the list of transcripts for the symbol
        list_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/list"
        list_querystring = {"id": symbol, "size": "20", "number": "1"}
        list_response = requests.get(list_url, headers=headers, params=list_querystring)

        # Check if the response was successful
        if list_response.ok:
            list_data = list_response.json()
            found_earnings_call = False  # Flag to indicate if the earnings call is found

            for item in list_data['data']:
                title = item['attributes']['title']

                # Check if the title matches the criteria
                if "Earnings" in title and "Call" in title and "Transcript" in title and symbol.upper() in title:
                    found_earnings_call = True  # Set the flag to True as we found the earnings call
                    call_id = item['id']

                    # Extract quarter and year from the title
                    parts = title.split()
                    for part in parts:
                        if "Q" in part and part[1:].isdigit():
                            quarter = part  # This will be something like 'Q3'
                        if part.isdigit() and len(part) == 4:
                            year = part  # This will be something like '2023'

                                # Existing code to extract quarter and year...


                    output_file_path = f"Events/{symbol}/{quarter}-{year}/Behavioral/identified_speaker_data_{symbol}_{year}_{quarter}.json"

                    #Check if the processed call already exists in the bucket
                    if blob_exists("earningsedge", output_file_path):
                        print(f"Processed call {id} for symbol {symbol} already exists in the bucket. Skipping.")
                        break  # Return false if blob exists

                    #Run Hume with call_id
                    urls = [f"https://static.seekingalpha.com/cdn/s3/transcripts_audio/{call_id}.mp3"]
                    configs = [ProsodyConfig(granularity='sentence', identify_speakers=True, window={'length': 4.0, 'step': 1.0}), LanguageConfig(identify_speakers=True, granularity='sentence')]
                    job = client.submit_job(urls, configs)
                    job.await_complete(timeout=3000)
                    job.download_predictions(f"predictions_{id}.json")


                    with open(f'predictions_{id}.json', 'r') as file:
                        hume_data = json.load(file)

                    hume_speakers_text = defaultdict(str)

                    for result in hume_data:
                        predictions = result.get('results', {}).get('predictions', [])
                        for prediction in predictions:
                            models = prediction.get('models', {})
                            for model in models.values():
                                grouped_predictions = model.get('grouped_predictions', [])

                                for gp in grouped_predictions:
                                    speaker_id = gp.get('id')
                                    speaker_text = [p.get('text') for p in gp.get('predictions')]
                                    hume_speakers_text[speaker_id] += ' '.join(speaker_text)



                    # Get the details of the earnings call
                    details_url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/get-details"
                    details_querystring = {"id": call_id}
                    details_response = requests.get(details_url, headers=headers, params=details_querystring)

                    # Check if the details response was successful
                    if details_response.ok:
                        details_data = details_response.json()
                        # Extracting the raw html transcript
                        htmlTranscript = details_data['data']['attributes']['content']

                        # Use the function to find the CEO's name from the htmlTranscript
                        ceo_name = find_ceo_name(htmlTranscript)
                        if ceo_name:
                            print(f"CEO's Name: {ceo_name}")

                            # Extract the CEO's statements
                            ceo_statements = extract_ceo_statements(htmlTranscript, ceo_name)
                            # ceo_statements should be a concatenated string of all CEO statements
                            ceo_text = ' '.join(ceo_statements)
                            if ceo_statements:
                                print(f"Statements made by {ceo_name}:")
                                for statement in ceo_statements:
                                    print(statement)

                            else:
                                print(f"No statements found for {ceo_name}.")
                            # Define the range of characters to use for comparison
                            char_range_start = 50
                            char_range_end = 800

                            # Extract a slice of the CEO's statements within the specified range
                            # Ensure that the text is long enough for the slice
                            ceo_text_slice = ceo_text[char_range_start:char_range_end] if len(ceo_text) > char_range_end else ceo_text

                            # Store the most similar speaker and the highest similarity ratio found
                            most_similar_speaker_id = None
                            highest_similarity_ratio = 0

                            # Compare the sliced statements of each speaker with the CEO's sliced statements
                            for speaker_id, speaker_text in hume_speakers_text.items():
                                # Take a slice of the speaker's text
                                speaker_text_slice = speaker_text[char_range_start:char_range_end] if len(speaker_text) > char_range_end else speaker_text
                                # Calculate the similarity ratio for the sliced texts
                                ratio = similarity_ratio(ceo_text_slice, speaker_text_slice)
                                if ratio > highest_similarity_ratio:
                                    highest_similarity_ratio = ratio
                                    most_similar_speaker_id = speaker_id

                            identified_speaker_data = [gp for gp in grouped_predictions if gp.get('id') == most_similar_speaker_id]
                            # Write the identified speaker's data to a JSON file for this call
                            identified_speaker_dict = {
                                        "identified_speaker_data": identified_speaker_data
                                    }

                            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

                            with open(output_file_path, "w") as output_file:
                                    json.dump(identified_speaker_dict, output_file)


                            bucket_name = 'earningsedge'
                            upload_to_bucket(output_file_path, output_file_path, bucket_name)
                            local_file_path = f"local_{output_file_path}"
                            blob_name = f"{output_file_path}"
                            upload_to_bucket(local_file_path, blob_name, bucket_name)

                            print(f"Identified speaker's data for {symbol} {year} {quarter} has been saved to: {output_file_path}")

                            # Initialize the Google Cloud Storage client
                            storage_client = storage.Client.from_service_account_json('lucky-history-383101-274a80771e41.json')

                            # Your bucket name
                            bucket_name = 'earningsedge'
                            json_filepath = output_file_path

                            # Generate the emotion timeline for the current speaker
                            process_json(storage_client, bucket_name, json_filepath, symbol, quarter, year)

                            # Load the emotion timeline JSON file
                            blob = storage_client.get_bucket(bucket_name).blob(json_filepath.replace(f'identified_speaker_data_{symbol}_{year}_{quarter}.json', f'{symbol}-EmotionTimeline-{quarter}-{year}.json'))
                            emotion_timeline_json = blob.download_as_text()
                            emotion_timeline = json.loads(emotion_timeline_json)

                             # Calculate the average emotion scores and push to GCS as JSON
                            calculate_average_emotion_scores(emotion_timeline, storage_client, bucket_name, json_filepath, symbol, quarter, year)



                            # After the loop, most_similar_speaker_id holds the ID of the most similar speaker
                            if most_similar_speaker_id is not None:
                                print(f"The speaker most similar to the CEO within the character range {char_range_start}-{char_range_end} is: Speaker {most_similar_speaker_id} with a similarity ratio of {highest_similarity_ratio:.2f}")
                            else:
                                print("No similar speaker found within the specified character range.")

                        else:
                            print("CEO's name not found in the transcript.")

                        break  # Stop iterating as we found the earnings call
                    else:
                        print(f"Failed to get details for {call_id}")
                        break  # Stop iterating as we found the earnings call but couldn't get details

            if not found_earnings_call:
                # If we didn't find an earnings call, print a message
                print(f"No earnings call transcript found for {symbol}")

        else:
            print(f"Failed to get list for {symbol}")

    # Handle exceptions that may occur during the API requests
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")





Processed call <built-in function id> for symbol NFLX already exists in the bucket. Skipping.
Processed call <built-in function id> for symbol AAPL already exists in the bucket. Skipping.
Processed call <built-in function id> for symbol GOOG already exists in the bucket. Skipping.
Processed call <built-in function id> for symbol MSFT already exists in the bucket. Skipping.
Processed call <built-in function id> for symbol AI already exists in the bucket. Skipping.


In [None]:
import requests


tickers = ["AAPL", "MSFT", "AMZN"]
url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/list"
for ticker in tickers:
  querystring = {"id":f"{ticker}","size":"20","number":"1"}

  headers = {
    "X-RapidAPI-Key": "780f2a96d1mshd6f358e08848cd1p18cd1ajsne72c995bab67",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
  }

  response = requests.get(url, headers=headers, params=querystring)

  print(response.json())

{'data': [{'id': '4646927', 'type': 'transcript', 'attributes': {'publishOn': '2023-11-02T21:32:21-04:00', 'isLockedPro': False, 'commentCount': 1, 'gettyImageUrl': None, 'videoPreviewUrl': None, 'themes': {}, 'title': 'Apple Inc. (AAPL) Q4 2023 Earnings Call Transcript', 'isPaywalled': False}, 'relationships': {'author': {'data': {'id': '44211', 'type': 'author'}}, 'sentiments': {'data': []}, 'primaryTickers': {'data': [{'id': '146', 'type': 'tag'}]}, 'secondaryTickers': {'data': []}, 'otherTags': {'data': [{'id': '49', 'type': 'tag'}]}}, 'links': {'self': '/article/4646927-apple-inc-aapl-q4-2023-earnings-call-transcript'}}, {'id': '4623702', 'type': 'transcript', 'attributes': {'publishOn': '2023-08-03T21:47:09-04:00', 'isLockedPro': False, 'commentCount': 3, 'gettyImageUrl': None, 'videoPreviewUrl': None, 'themes': {}, 'title': 'Apple Inc. (AAPL) Q3 2023 Earnings Call Transcript', 'isPaywalled': False}, 'relationships': {'author': {'data': {'id': '44211', 'type': 'author'}}, 'sentim

KeyboardInterrupt: ignored

#Moving Averages


In [None]:
import requests
import pandas as pd
import json
from datetime import datetime
from google.cloud import storage
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"
# Function to get constituents of a given index
def get_index_constituents(api_endpoint, api_key):
    url = f"https://financialmodelingprep.com/api/v3/{api_endpoint}?apikey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data)
    else:
        print(f"Failed to fetch data for {api_endpoint}:", response.status_code)
        return pd.DataFrame()

# Function to get the current price and average prices for a given stock
def get_stock_data(ticker, api_key):
    url = f"https://financialmodelingprep.com/api/v3/quote/{ticker}?apikey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data)
    else:
        print(f"Failed to fetch data for {ticker}:", response.status_code)
        return pd.DataFrame()

# Function to calculate the percentages of stocks above their 50-day and 200-day MAs for a given index
def calculate_percentages_above_mas(api_key, api_endpoint):
    constituents_df = get_index_constituents(api_endpoint, api_key)
    tickers = constituents_df['symbol'].tolist()
    above_50_ma_count = 0
    above_200_ma_count = 0

    for ticker in tickers:
        stock_data = get_stock_data(ticker, api_key)
        if not stock_data.empty:
            if stock_data['price'].iloc[0] > stock_data['priceAvg50'].iloc[0]:
                above_50_ma_count += 1
            if stock_data['price'].iloc[0] > stock_data['priceAvg200'].iloc[0]:
                above_200_ma_count += 1

    percentage_above_50_ma = (above_50_ma_count / len(tickers)) * 100
    percentage_above_200_ma = (above_200_ma_count / len(tickers)) * 100
    return {
        '50_day_ma': percentage_above_50_ma,
        '200_day_ma': percentage_above_200_ma
    }

# Function to append data to a json file in GCS
def append_to_gcs_json(bucket_name, destination_blob_name, new_data, storage_client):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Get today's date in UTC
    today_utc = datetime.utcnow().date()

    # Check if the blob exists and get its content
    if blob.exists(storage_client):
        existing_data = json.loads(blob.download_as_string())
    else:
        existing_data = []

    # Check if today's date is already in the existing data
    if existing_data and today_utc.isoformat() == existing_data[0]['date']:
        print("Today's data already exists. No new data appended.")
    else:
        # Append the new data with today's date
        new_data_entry = {
            'date': today_utc.isoformat(),
            'indices': {
                'SP500': new_data['S&P 500'],
                'NASDAQ': new_data['NASDAQ'],
                'DowJones': new_data['Dow Jones']
            }
        }
        existing_data.append(new_data_entry)

        # Upload the updated data
        blob.upload_from_string(json.dumps(existing_data, indent=4), content_type='application/json')
        print("New data appended.")

# Main execution logic
def main(api_key):
    # Initialize the GCS client
    storage_client = storage.Client()

    # Dictionary to hold the percentage of stocks above their 50-day and 200-day MAs for each index
    index_ma_percentages = {
        'S&P 500': calculate_percentages_above_mas(api_key, 'sp500_constituent'),
        'NASDAQ': calculate_percentages_above_mas(api_key, 'nasdaq_constituent'),
        'Dow Jones': calculate_percentages_above_mas(api_key, 'dowjones_constituent')
    }

    # Name of the bucket and the desired file path
    bucket_name = 'earningsedge'  # replace with your actual bucket name
    destination_blob_name = 'Insights/ma_data.json'  # replace with your actual file path

    # Append the new data to the existing JSON in the bucket
    append_to_gcs_json(bucket_name, destination_blob_name, index_ma_percentages, storage_client)

if __name__ == "__main__":
    api_key = '28d5e9c0188bdad19a8f0bded39a200a'  # replace with your actual API key
    main(api_key)





New data appended.


#Advancers Vs. Decliners

In [None]:
import json
import requests
import certifi
from urllib.request import urlopen
import os
from google.cloud import storage

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "lucky-history-383101-274a80771e41.json"

# Function to parse JSON data from the API response
def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

# Function to get historical data for a ticker
def get_historical_data(ticker, api_key):
    url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{ticker}?apikey={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data.get('historical', [])
    else:
        print(f"Failed to fetch data for {ticker}: {response.status_code}")
        return []

# Replace 'your_api_key' with your actual API key
api_key = '28d5e9c0188bdad19a8f0bded39a200a'

# Get a list of tickers, classified by market cap
url = "https://financialmodelingprep.com/api/v3/stock-screener?marketCapMoreThan=300000000&exchange=nyse,nasdaq&limit=10000&apikey=" + api_key
data = get_jsonparsed_data(url)

# Define the market cap categories
smallCap = [stock['symbol'] for stock in data if 300000000 <= stock['marketCap'] < 2000000000 and '.' not in stock['symbol'] and '-' not in stock['symbol'] and len(stock['symbol']) <= 4]
mediumCap = [stock['symbol'] for stock in data if 2000000000 <= stock['marketCap'] < 10000000000 and '.' not in stock['symbol'] and '-' not in stock['symbol'] and len(stock['symbol']) <= 4]
largeCap = [stock['symbol'] for stock in data if 10000000000 <= stock['marketCap'] < 200000000000 and '.' not in stock['symbol'] and '-' not in stock['symbol'] and len(stock['symbol']) <= 4]
megaCap = [stock['symbol'] for stock in data if stock['marketCap'] >= 200000000000 and '.' not in stock['symbol'] and '-' not in stock['symbol'] and len(stock['symbol']) <= 4]

# Initialize the dictionary to hold the aggregated counts and up/down counts for each market cap category
aggregated_counts = {
    'smallCap': {},
    'mediumCap': {},
    'largeCap': {},
    'megaCap': {}
}

# Process each market cap category
for category_name, cap_list in [('smallCap', smallCap), ('mediumCap', mediumCap), ('largeCap', largeCap), ('megaCap', megaCap)]:
    # Use the first ticker as a reference for the master list of dates
    if cap_list:
        reference_ticker = cap_list[0]
        reference_data = get_historical_data(reference_ticker, api_key)
        reference_dates = [day['date'] for day in reference_data]

        # Initialize the dictionary for this category
        date_aggregated_counts = {date: {'aggregated_count': 0, 'up_count': 0, 'down_count': 0} for date in reference_dates}

        # Loop through each ticker and get its historical data
        for ticker in cap_list:
            historical_data = get_historical_data(ticker, api_key)

            # Loop over each date in the reference ticker's data
            for date in reference_dates:
                # Find the corresponding day's data for the current ticker
                day_data = next((item for item in historical_data if item['date'] == date), None)

                # If the day's data is found, perform the calculations
                if day_data:
                    change = day_data['change']
                    if change > 0:
                        date_aggregated_counts[date]['up_count'] += 1
                        date_aggregated_counts[date]['aggregated_count'] += 1
                    elif change < 0:
                        date_aggregated_counts[date]['down_count'] += 1
                        date_aggregated_counts[date]['aggregated_count'] -= 1
                # If there's no data for that date, skip it
                else:
                    continue

        # Reverse the order so the earliest date comes first
        graph_data = [{"date": date, "aggregated_count": count['aggregated_count'], "up_count": count['up_count'], "down_count": count['down_count']} for date, count in date_aggregated_counts.items()]
        graph_data.reverse()

        # Store the data for this market cap category
        aggregated_counts[category_name] = graph_data

bucket_name = 'earningsedge'

# Path in the bucket
destination_blob_name = 'Insights/advancers_and_decliners.json'
# Initialize the GCS client
storage_client = storage.Client()
# Create a bucket object
bucket = storage_client.bucket(bucket_name)

# Create a blob object
blob = bucket.blob(destination_blob_name)

# Upload the JSON data to the GCS bucket
blob.upload_from_string(
    data=json.dumps(aggregated_counts, indent=4),
    content_type='application/json'
)

print(f"Data uploaded to {destination_blob_name} in the bucket {bucket_name}")

