In [None]:
# In order to load environment variables, import these 2 libraries:
import os
from dotenv import load_dotenv

In [None]:
# Load your environment variables
load_dotenv('_.env')  # Load environment variables from the file '.env'
API_KEY_FMP = os.environ.get('API_KEY_FMP')  # Retrieve the value of the environment variable 'API_KEY_FMP'   
API_KEY_FRED = os.environ.get('API_KEY_FRED')  # Retrieve the value of the environment variable 'API_KEY_FRED'

In [None]:
# Import the requests library
import requests

# Construct your API request endpoint url
BASE_URL = 'https://financialmodelingprep.com/api/v3'
company_tick = "AAPL"
endpoint_url_dividend = f"{BASE_URL}/historical-price-full/stock_dividend/{company_tick}?apikey={API_KEY_FMP}"

endpoint_url_ratios = f"{BASE_URL}/key-metrics/{company_tick}?apikey={API_KEY_FMP}"

In [None]:
# Send an HTTP GET request to the endpoint URL and store the response
response = requests.get(endpoint_url_dividend)
if response.status_code == 429:
    print("FMP API limit reached")
print(type(response))

In [None]:
# Convert json to dictionary object and then a Pandas Dataframe
import pandas as pd
response_dict = response.json()
dividends = pd.DataFrame(response_dict['historical'])

In [None]:
print(dividends) 

In [None]:
print(response_dict)

In [None]:
#design preferred date range  

start_year = 2013  
end_year = 2022  

end_year = end_year + 1  

list(range(start_year, end_year))

In [None]:
# Data Transformation
if dividends.shape == (0, 0):  # Handle the case where the company never issued any dividend in the past
    dividends = pd.DataFrame({
        "year": list(range(start_year - 1, end_year + 1)),
        "adjDividend": [0.0] * len(list(range(start_year - 1, end_year + 1)))  # We are obtaining 2 more years' data
    })
else:
    # Extract year data from the date column
    dividends['year'] = pd.to_datetime(dividends['date']).dt.year
    # Aggregate the dividend paid by year
    dividends = dividends.groupby("year").agg({"adjDividend": "sum"}).reset_index()
    # Create a new DataFrame with all years from start to end - So that we don't omit years without dividends
    all_years = pd.DataFrame({'year': list(range(start_year - 1, end_year + 1))})
    # Merge the two DataFrames on the year column and fill missing values with 0.0
    dividends = all_years.merge(dividends, on='year', how='left').fillna(0.0)

In [None]:
# Let's create our target variable
import numpy as np

dividends['next_year_dividend'] = dividends['adjDividend'].shift(-1)

conditions = [
    dividends['adjDividend'] <= dividends['next_year_dividend'],
    dividends['adjDividend'] > dividends['next_year_dividend']
]

choices = ['constant/increased', 'decreased']      

# Create the target column 'dps_change' based on the conditions
dividends['dps_change_next_year'] = np.select(conditions, choices, default=np.nan)

In [None]:
# We can also create a predictor here - dps change from last year
dividends['last_year_dividend'] = dividends['adjDividend'].shift(1)
dividends['dps_growth'] = dividends['adjDividend'] - dividends['last_year_dividend']

In [None]:
print(dividends)

In [None]:
# Another predictor that we can create is dividend change as a percentage
dividends['dps_growth_rate'] = np.where(
    (dividends['last_year_dividend'] == 0) & (dividends['adjDividend'] == 0),
    0,  # If both are 0 then change is 0
    np.where(
        dividends['last_year_dividend'] != 0,
        ((dividends['adjDividend'] / dividends['last_year_dividend']) - 1) * 100,
        999  # If last year dividend is 0 then return 999
    )
)

In [None]:
# Remove the first last year since they will be NaN
dividends = dividends.loc[(dividends['year'] >= start_year) & (dividends['year'] <= end_year - 1)]
# Only keep the columns that we need
dividends = dividends[["year", "adjDividend", "dps_growth", "dps_growth_rate", "dps_change_next_year"]]

In [None]:
print(dividends)

In [None]:
# Engineer some other predictors
predictors = pd.DataFrame({"year": list(range(start_year - 1, end_year))})  # We include one more year before
                                                                            # the first year to calculate changes

In [None]:
print(predictors)

In [None]:
# Let's include the Company's Industry and sector data
import yfinance as yf
company_data_raw = yf.Ticker(company_tick)
company_data = company_data_raw.info
predictors["industry"] = company_data['industry']
predictors["sector"] = company_data['sector']

In [None]:
print(predictors)

In [None]:
# Let's add more Predictors -- dividend payout ratio, return on equity, board composition(gender and title) and dividend yield

# Construct the API request endpoint url
BASE_URL = 'https://financialmodelingprep.com/api/v3'
company_tick = "AAPL"

endpoint_ratios = f"{BASE_URL}/ratios/{company_tick}?apikey={API_KEY_FMP}"    
endpoint_ceo = f"{BASE_URL}/key-executives/{company_tick}?apikey={API_KEY_FMP}"    
endpoint_yield  = f"{BASE_URL}/key-metrics/{company_tick}?apikey={API_KEY_FMP}"  

In [None]:
# Send an HTTP GET request to the endpoint URL and store the response
end_response = requests.get(endpoint_ratios)
if end_response.status_code == 429:
    print("FMP API limit reached")
print(type(end_response))  
  
end_response1 = requests.get(endpoint_ceo)
if end_response1.status_code == 429:
    print("FMP API limit reached")
print(type(end_response1))    

end_response2 = requests.get(endpoint_yield)  
if end_response1.status_code == 429:
    print("FMP API limit reached")
print(type(end_response2))      

end_response3 = requests.get(endpoint_yield)  
if end_response1.status_code == 429:
    print("FMP API limit reached")
print(type(end_response2))     

In [None]:
# Convert json to dictionary object and then a Pandas Dataframe
import pandas as pd
response_dicts = end_response.json()  
payout =  pd.DataFrame(response_dicts)  

response_dicts2 = end_response1.json()  
board_compose = pd.DataFrame(response_dicts2)    

response_dicts3 = end_response2.json()  
div_yield = pd.DataFrame(response_dicts3)      

response_dicts4 = end_response3.json()  
roe = pd.DataFrame(response_dicts4)      

In [None]:
print(payout)

In [None]:
print(board_compose)

In [None]:
print(div_yield)

In [None]:
print(roe)

In [None]:
predictors['dividendPayoutRatio'] = payout['dividendPayoutRatio']  
predictors['boardtitle'] = board_compose['title']  
predictors['boardgender'] = board_compose['gender']    
predictors['dividendYield'] = div_yield['dividendYield']  
predictors['roe'] = div_yield['roe']

In [None]:
print(predictors)

In [None]:
def calculate_change(df, feature_name):
    percentage_change = df[feature_name].pct_change() * 100
    # Create new column name
    new_col_name = f"{feature_name}_percentage_change"
    # Find the index position of the original predictor column
    original_col_position = df.columns.get_loc(feature_name)
    # Insert the new column right after the original predictor column
    df.insert(original_col_position + 1, new_col_name, percentage_change)

In [None]:
feature_list = list(predictors.columns)
feature_list.remove('year')
feature_list.remove('industry')
feature_list.remove('sector')  
feature_list.remove('boardtitle')  
feature_list.remove('boardgender')

In [None]:
for feature in feature_list:
    calculate_change(predictors, feature)

# Replacing inf and NaN values
predictors.replace([float('inf'), float('-inf')], 999, inplace=True)
predictors.fillna(0, inplace=True)

# Combine dividend data with other predictors
dataset = pd.merge(dividends, predictors, left_on='year', right_on='year', how='left')

# Move target to the end of the dataset for good practice
feature_list = list(dataset.columns)
feature_list.append('dps_change_next_year')
feature_list.remove('dps_change_next_year')
dataset = dataset[feature_list]

# Return the modified dataset
dataset