Macroeconomic variables from the World Bank?
Market volatility index?

In [37]:
import pandas as pd

In [38]:
df = pd.read_csv('financial_statements.csv')

In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 15 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Ticker                                               9 non-null      object 
 1   Date                                                 9 non-null      int64  
 2   CostOfGoodsAndServicesSold                           6 non-null      float64
 3   GrossProfit                                          9 non-null      int64  
 4   NetIncomeLoss                                        9 non-null      int64  
 5   OperatingIncomeLoss                                  9 non-null      int64  
 6   RevenueFromContractWithCustomerExcludingAssessedTax  9 non-null      int64  
 7   Assets                                               9 non-null      int64  
 8   AssetsCurrent                                        9 non-null      int64

In [40]:
df.rename(columns={'RevenueFromContractWithCustomerExcludingAssessedTax': 'Revenues',
                   'CostOfGoodsAndServicesSold': 'Cost of Goods Sold',
                   'GrossProfit': 'Gross Profit',
                   'OperatingIncomeLoss': 'Operating Income',
                   'NetCashProvidedByUsedInFinancingActivities': 'Cash from Financing Activities',
                   'NetCashProvidedByUsedInInvestingActivities': 'Cash from Investing Activities',
                   'NetCashProvidedByUsedInOperatingActivities': 'Cash from Operating Activities'
                   }, inplace=True)

# Calculate operating expenses as gross profit - operating income
df['Operating Expenses'] = df['Gross Profit'] - df['Operating Income']

# Calculate long-term assets as total assets - current assets
df['Long-Term Assets'] = df['Assets'] - df['AssetsCurrent']

# Calculate long-term liabilities as total liabilities - current liabilities
df['Long-Term Liabilities'] = df['Liabilities'] - df['LiabilitiesCurrent']

# Calculate equity as total assets - total liabilities
df['Equity'] = df['Assets'] - df['Liabilities']

original_columns = df.columns.tolist()
print("Original Columns:")
print(original_columns)

Original Columns:
['Ticker', 'Date', 'Cost of Goods Sold', 'Gross Profit', 'NetIncomeLoss', 'Operating Income', 'Revenues', 'Assets', 'AssetsCurrent', 'Liabilities', 'LiabilitiesAndStockholdersEquity', 'LiabilitiesCurrent', 'Cash from Financing Activities', 'Cash from Investing Activities', 'Cash from Operating Activities', 'Operating Expenses', 'Long-Term Assets', 'Long-Term Liabilities', 'Equity']


In [41]:
# Calculate financial ratios
df['Current Ratio'] = df['AssetsCurrent'] / df['LiabilitiesCurrent']
df['Debt to Equity Ratio'] = df['Liabilities'] / df['Equity']
df['Return on Assets'] = df['NetIncomeLoss'] / df['Assets']
df['Return on Equity'] = df['NetIncomeLoss'] / df['Equity']
df['Profit Margin'] = df['NetIncomeLoss'] / df['Revenues']
df['Operating Margin'] = df['Operating Income'] / df['Revenues']

# Calculate growth rates
df = df.sort_values(by=['Ticker', 'Date'])
df['Revenue Growth'] = df.groupby('Ticker')['Revenues'].pct_change()
df['Current Ratio Growth'] = df.groupby('Ticker')['Current Ratio'].pct_change()
df['Debt to Equity Ratio Growth'] = df.groupby('Ticker')['Debt to Equity Ratio'].pct_change()
df['Return on Assets Growth'] = df.groupby('Ticker')['Return on Assets'].pct_change()
df['Return on Equity Growth'] = df.groupby('Ticker')['Return on Equity'].pct_change()
df['Profit Margin Growth'] = df.groupby('Ticker')['Profit Margin'].pct_change()
df['Operating Margin Growth'] = df.groupby('Ticker')['Operating Margin'].pct_change()
df['Cash from Financing Activities Growth'] = df.groupby('Ticker')['Cash from Financing Activities'].pct_change()
df['Cash from Investing Activities Growth'] = df.groupby('Ticker')['Cash from Investing Activities'].pct_change()
df['Cash from Operating Activities Growth'] = df.groupby('Ticker')['Cash from Operating Activities'].pct_change()

# Drop all original columns except Ticker, Date, Assets, and the newly created ones
cols = ['Ticker', 'Date', 'Assets'] + [col for col in df.columns if col not in original_columns]
df = df[cols]
df

Unnamed: 0,Ticker,Date,Assets,Current Ratio,Debt to Equity Ratio,Return on Assets,Return on Equity,Profit Margin,Operating Margin,Revenue Growth,Current Ratio Growth,Debt to Equity Ratio Growth,Return on Assets Growth,Return on Equity Growth,Profit Margin Growth,Operating Margin Growth,Cash from Financing Activities Growth,Cash from Investing Activities Growth,Cash from Operating Activities Growth
2,AAPL,202209,352755000000,0.879356,5.961537,0.282924,1.969589,0.253096,0.302887,,,,,,,,,,
1,AAPL,202309,352583000000,0.988012,4.673462,0.275098,1.56076,0.253062,0.298214,-0.028005,0.123563,-0.216064,-0.027661,-0.207571,-0.000135,-0.015429,-0.020416,-1.165742,-0.09503
0,AAPL,202409,364980000000,0.867313,5.40878,0.256825,1.645935,0.239713,0.315102,0.02022,-0.122164,0.157339,-0.066425,0.054573,-0.052753,0.056631,0.124392,-0.207827,0.069756
5,MSFT,202206,364840000000,1.784607,1.190679,0.19937,0.436755,0.366863,0.420553,,,,,,,,,,
4,MSFT,202306,411976000000,1.769167,0.997721,0.175644,0.350887,0.341462,0.417729,0.06882,-0.008652,-0.162057,-0.119004,-0.196604,-0.069238,-0.006715,-0.253771,-0.251757,-0.016319
3,MSFT,202406,512163000000,1.274955,0.907661,0.172086,0.328281,0.35956,0.446443,0.1567,-0.279347,-0.090266,-0.020256,-0.064425,0.053,0.068739,-0.140617,3.275573,0.353566
8,TSLA,202212,82338000000,1.531956,0.793934,0.152493,0.273563,0.154133,0.167636,,,,,,,,,,
7,TSLA,202312,106618000000,1.725894,0.676146,0.140661,0.235769,0.154971,0.091875,0.187953,0.126595,-0.14836,-0.077592,-0.138157,0.005435,-0.45194,-1.734052,0.301595,-0.099701
6,TSLA,202412,122070000000,2.024912,0.656759,0.05809,0.09624,0.072587,0.072433,0.009476,0.173254,-0.028674,-0.587024,-0.591801,-0.53161,-0.21161,0.488219,0.205531,0.125754


In [42]:
# standardize Assets column
df['Assets'] = (df['Assets'] - df['Assets'].mean()) / df['Assets'].std()
df

Unnamed: 0,Ticker,Date,Assets,Current Ratio,Debt to Equity Ratio,Return on Assets,Return on Equity,Profit Margin,Operating Margin,Revenue Growth,Current Ratio Growth,Debt to Equity Ratio Growth,Return on Assets Growth,Return on Equity Growth,Profit Margin Growth,Operating Margin Growth,Cash from Financing Activities Growth,Cash from Investing Activities Growth,Cash from Operating Activities Growth
2,AAPL,202209,0.365781,0.879356,5.961537,0.282924,1.969589,0.253096,0.302887,,,,,,,,,,
1,AAPL,202309,0.364659,0.988012,4.673462,0.275098,1.56076,0.253062,0.298214,-0.028005,0.123563,-0.216064,-0.027661,-0.207571,-0.000135,-0.015429,-0.020416,-1.165742,-0.09503
0,AAPL,202409,0.445558,0.867313,5.40878,0.256825,1.645935,0.239713,0.315102,0.02022,-0.122164,0.157339,-0.066425,0.054573,-0.052753,0.056631,0.124392,-0.207827,0.069756
5,MSFT,202206,0.444644,1.784607,1.190679,0.19937,0.436755,0.366863,0.420553,,,,,,,,,,
4,MSFT,202306,0.752239,1.769167,0.997721,0.175644,0.350887,0.341462,0.417729,0.06882,-0.008652,-0.162057,-0.119004,-0.196604,-0.069238,-0.006715,-0.253771,-0.251757,-0.016319
3,MSFT,202406,1.406029,1.274955,0.907661,0.172086,0.328281,0.35956,0.446443,0.1567,-0.279347,-0.090266,-0.020256,-0.064425,0.053,0.068739,-0.140617,3.275573,0.353566
8,TSLA,202212,-1.398877,1.531956,0.793934,0.152493,0.273563,0.154133,0.167636,,,,,,,,,,
7,TSLA,202312,-1.240433,1.725894,0.676146,0.140661,0.235769,0.154971,0.091875,0.187953,0.126595,-0.14836,-0.077592,-0.138157,0.005435,-0.45194,-1.734052,0.301595,-0.099701
6,TSLA,202412,-1.139598,2.024912,0.656759,0.05809,0.09624,0.072587,0.072433,0.009476,0.173254,-0.028674,-0.587024,-0.591801,-0.53161,-0.21161,0.488219,0.205531,0.125754


In [43]:
# impute growth rate NaN values with an average growth of the company
growth_rate_columns = [col for col in df.columns if 'Growth' in col]
for col in growth_rate_columns:
    df[col] = df[col].fillna(df.groupby('Ticker')[col].transform('mean'))

df

Unnamed: 0,Ticker,Date,Assets,Current Ratio,Debt to Equity Ratio,Return on Assets,Return on Equity,Profit Margin,Operating Margin,Revenue Growth,Current Ratio Growth,Debt to Equity Ratio Growth,Return on Assets Growth,Return on Equity Growth,Profit Margin Growth,Operating Margin Growth,Cash from Financing Activities Growth,Cash from Investing Activities Growth,Cash from Operating Activities Growth
2,AAPL,202209,0.365781,0.879356,5.961537,0.282924,1.969589,0.253096,0.302887,-0.003892,0.0007,-0.029363,-0.047043,-0.076499,-0.026444,0.020601,0.051988,-0.686785,-0.012637
1,AAPL,202309,0.364659,0.988012,4.673462,0.275098,1.56076,0.253062,0.298214,-0.028005,0.123563,-0.216064,-0.027661,-0.207571,-0.000135,-0.015429,-0.020416,-1.165742,-0.09503
0,AAPL,202409,0.445558,0.867313,5.40878,0.256825,1.645935,0.239713,0.315102,0.02022,-0.122164,0.157339,-0.066425,0.054573,-0.052753,0.056631,0.124392,-0.207827,0.069756
5,MSFT,202206,0.444644,1.784607,1.190679,0.19937,0.436755,0.366863,0.420553,0.11276,-0.144,-0.126161,-0.06963,-0.130514,-0.008119,0.031012,-0.197194,1.511908,0.168623
4,MSFT,202306,0.752239,1.769167,0.997721,0.175644,0.350887,0.341462,0.417729,0.06882,-0.008652,-0.162057,-0.119004,-0.196604,-0.069238,-0.006715,-0.253771,-0.251757,-0.016319
3,MSFT,202406,1.406029,1.274955,0.907661,0.172086,0.328281,0.35956,0.446443,0.1567,-0.279347,-0.090266,-0.020256,-0.064425,0.053,0.068739,-0.140617,3.275573,0.353566
8,TSLA,202212,-1.398877,1.531956,0.793934,0.152493,0.273563,0.154133,0.167636,0.098714,0.149925,-0.088517,-0.332308,-0.364979,-0.263088,-0.331775,-0.622916,0.253563,0.013027
7,TSLA,202312,-1.240433,1.725894,0.676146,0.140661,0.235769,0.154971,0.091875,0.187953,0.126595,-0.14836,-0.077592,-0.138157,0.005435,-0.45194,-1.734052,0.301595,-0.099701
6,TSLA,202412,-1.139598,2.024912,0.656759,0.05809,0.09624,0.072587,0.072433,0.009476,0.173254,-0.028674,-0.587024,-0.591801,-0.53161,-0.21161,0.488219,0.205531,0.125754


In [44]:
df.to_csv('financial_statements_enhanced.csv', index=False)