In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,  Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
import yfinance as yf
from pandas_datareader.data import DataReader
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import quandl
from fredapi import Fred
import statsmodels.api as sm
import matplotlib.dates as mdates



pd.set_option("display.max_rows",200)
sns.set(rc={'figure.figsize':(16,10)})
fred_key = "df4910b2cad947d95cf6ab16ba11d74d"
fred = Fred(api_key = fred_key)
quandl.ApiConfig.api_key = 'Qq5R29Xiqp2yUbb9dzNq'


In [2]:
def plot(prediction, target):

    fig, ax = plt.subplots(figsize=(10, 3), dpi=300)

    if not isinstance(prediction, pd.DataFrame):
        Results = pd.DataFrame(prediction, index=target.index)
        Results.plot(ax=ax, legend=True)
        ylim = (Results.min().min(), Results.max().max())
    else:
        prediction.plot(ax=ax, legend=True)
        ylim = (0, 1)

    ax.fill_between(target.index, 0, ylim[1] + 1e-2, target, facecolor='k', alpha=0.1)
    # ax.fill_between(target.shift(-250).index, 0, ylim[1] + 1e-2, target.shift(-250), facecolor='r', alpha=0.1)

    if not isinstance(prediction, pd.DataFrame):
        legend_list = ["Prediction", "NBER recession indicator"]
    else:
        legend_list = ["Prediction", 'lower', 'upper', "NBER recession indicator"]
    ax.legend(legend_list)
    return fig, ax


In [3]:
MacroCode=pd.read_csv(r'Data\Macro Variables.csv')
MacroCode.replace({'Average HOUST':'HOUST','S&P 500':'SP500'},inplace=True)

errors=[]
indicators={}
freq={}
for code in MacroCode['Variable']:
    if not ('S&P' in code):
        try:
            col=fred.get_series(code).to_frame(code).squeeze()
            if code == 'CPFF':
                indicators[code]=(col) #.resample('MS').last()
            elif code == "ICSA":
                indicators[code]=(col) #.resample('MS').sum()
            elif code == "SP500":
                indicators[code]=(col) #.resample('MS').first()
                # indicators['SP500']=(yf.download('^GSPC')['Close'].to_frame('SP500').squeeze())
            else:
                indicators[code]=(col) # .resample('M').interpolate()
        except ValueError:
            errors.append(code)
indicators['S&P: indust']=(yf.download('^SP500-20')['Close'].to_frame('S&P: indust').squeeze()) # .resample("MS").last()
indicators['S&P div yield']=(quandl.get("MULTPL/SP500_DIV_YIELD_MONTH").squeeze().to_frame('S&P div yield').squeeze()) # .resample("MS").last()
indicators['S&P PE ratio']=(quandl.get("MULTPL/SP500_PE_RATIO_MONTH").squeeze().to_frame('S&P PE ratio').squeeze()) # .resample("MS").first()



[*********************100%***********************]  1 of 1 completed


In [4]:
for key in indicators:
    print(key,pd.infer_freq(indicators[key].index))

RPI MS
W875RX1 MS
DPCERA3M086SBEA MS
CMRMTSPL MS
RSXFS MS
INDPRO MS
IPFPNSS MS
IPFINAL MS
IPCONGD MS
IPDCONGD MS
IPNCONGD MS
IPBUSEQ MS
IPMAT MS
IPDMAT MS
IPNMAT MS
IPMANSICS MS
IPB51222S MS
IPFUELS MS
CUMFNS MS
CLF16OV MS
CE16OV MS
UNRATE MS
UEMPMEAN MS
UEMPLT5 MS
UEMP5TO14 MS
UEMP15OV MS
UEMP15T26 MS
UEMP27OV MS
ICSA W-SAT
PAYEMS MS
USGOOD MS
CES1021000001 MS
USCONS MS
MANEMP MS
DMANEMP MS
NDMANEMP MS
SRVPRD MS
USTPU MS
USWTRADE MS
USTRADE MS
USFIRE MS
USGOVT MS
CES0600000007 MS
AWOTMAN MS
AWHMAN MS
HOUST MS
HOUSTNE MS
HOUSTMW MS
HOUSTS MS
HOUSTW MS
PERMIT MS
PERMITNE MS
PERMITMW MS
PERMITS MS
PERMITW MS
ACOGNO MS
DGORDER MS
NEWORDER MS
AMDMUO MS
BUSINV MS
ISRATIO MS
M1SL MS
M2SL MS
M2REAL MS
AMBSL MS
TOTRESNS MS
NONBORRES MS
BUSLOANS MS
REALLN MS
NONREVSL MS
TOTALSL MS
SP500 B
FEDFUNDS MS
TB3MS MS
TB6MS MS
GS1 MS
GS5 MS
GS10 MS
AAA MS
BAA MS
CPFF B
TB3SMFFM MS
TB6SMFFM MS
T1YFFM MS
T5YFFM MS
T10YFFM MS
AAAFFM MS
BAAFFM MS
TWEXMMTH MS
EXSZUS MS
EXJPUS MS
EXUSUK MS
EXCAUS MS
WPSFD4920

In [18]:
if len(errors)>0:
    raise Exception("Erorrs found")
data=pd.concat(indicators,axis=1).copy() #.fillna(method='ffill') #.loc["1971":].dropna(how='all').fillna(method='ffill')
# data.to_csv('Data/HistoricalVariables.csv')
# print(data.shape)
# data.isna().sum().sort_values()/data.shape[0]

In [19]:
data=data.fillna(method='ffill') #.resample('D').last()

In [20]:
def deltaX(data):
    return data.diff()

def delta2X(data):
    return data-2*data.shift(1)+data.shift(2)

def logdata(data):
    return np.log(data)

def difflog(data):
    return np.log(data).diff()

def difflog2(data):
    return np.log(data)-2*np.log(data.shift(1))+np.log(data.shift(2))

def diffpercent(data):
    return (data/data.shift() - 1) - (data.shift()/data.shift(2) - 1)



transformation={1:(lambda x: x),
                2:deltaX,
                3:delta2X,
                4:logdata,
                5:difflog,
                6:difflog2,
                7:diffpercent,
                }

df_transformed=pd.DataFrame()

for column in data.columns:
    type=MacroCode[MacroCode['Variable'] == column]['Transformation'].iloc[0]
    df_transformed[column]=transformation[type](data[column])
    df_transformed=df_transformed.copy()

In [21]:
# df=yf.download("GE")
# df.head()
n_lookback=30
n_forecast=1

# df=pd.read_csv('Data/Transformed HistoricalVariables.csv',index_col=0,parse_dates=True)
df=df_transformed.loc['1971':].copy()
df.head()

Unnamed: 0,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPL,RSXFS,INDPRO,IPFPNSS,IPFINAL,IPCONGD,IPDCONGD,...,CES2000000008,CES3000000008,UMCSENT,MZMSL,DTCOLNVHFNM,DTCTHFNM,INVEST,S&P: indust,S&P div yield,S&P PE ratio
1971-01-01,0.008594,0.0096,0.013628,0.00975,,0.007663,0.003694,0.003908,0.014828,0.039006,...,0.014213,0.011976,0.0,0.007963,-0.003878,-0.00768,0.023135,,0.0,0.031962
1971-01-02,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,-0.014213,-0.011976,0.0,-0.007963,0.003878,0.00768,-0.023135,,0.0,0.0
1971-01-09,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
1971-01-16,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0
1971-01-23,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0


In [22]:

recessions =  DataReader('USRECDM', 'fred', start='1800')
# df[f'in a recession']=recessions
df['target'] = recessions.shift(-250)

df.fillna(method='ffill',inplace=True)
# df=df.fillna(-1)
df=df['1971':] #.resample('MS').last()
print(df.shape)
df.head()

(11121, 125)


Unnamed: 0,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPL,RSXFS,INDPRO,IPFPNSS,IPFINAL,IPCONGD,IPDCONGD,...,CES3000000008,UMCSENT,MZMSL,DTCOLNVHFNM,DTCTHFNM,INVEST,S&P: indust,S&P div yield,S&P PE ratio,target
1971-01-01,0.008594,0.0096,0.013628,0.00975,,0.007663,0.003694,0.003908,0.014828,0.039006,...,0.011976,0.0,0.007963,-0.003878,-0.00768,0.023135,,0.0,0.031962,0.0
1971-01-02,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,-0.011976,0.0,-0.007963,0.003878,0.00768,-0.023135,,0.0,0.0,0.0
1971-01-09,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0
1971-01-16,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0
1971-01-23,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0


In [23]:
df.target.value_counts()


target
0.0    10099
1.0     1022
Name: count, dtype: int64

In [24]:
df.shape

(11121, 125)

In [25]:
df.target.shape

(11121,)

## Preprocessing


In [28]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from joblib import Parallel, delayed

# Assuming your dataframe is called 'data' and the target variable column is named 'target'
data  # Replace ... with your data

# Calculate VIF for each variable in parallel
X = data.fillna(0) #.drop('target', axis=1)  # Drop the target variable column

def calculate_vif(variable, X):
    vif = variance_inflation_factor(X.values, X.columns.get_loc(variable))
    return variable, vif

# Number of CPU cores to utilize
num_cores = 4  # Adjust this value as needed

# Calculate VIF in parallel
vif_results = Parallel(n_jobs=num_cores)(
    delayed(calculate_vif)(variable, X) for variable in X.columns
)

# Create VIF dataframe
vif = pd.DataFrame(vif_results, columns=['Variable', 'VIF'])

# Sort variables by VIF score
vif = vif.sort_values(by="VIF", ascending=False).reset_index(drop=True)

# Group collinear variables together
groups = []
group_threshold = 5  # Adjust this threshold as needed

while vif["VIF"].max() > group_threshold:
    max_vif_index = vif["VIF"].idxmax()
    max_vif_variable = vif.loc[max_vif_index, "Variable"]
    group = [max_vif_variable]

    # Find variables highly correlated with the current variable
    correlated_vars = X.corr()[max_vif_variable].abs().sort_values(ascending=False)
    correlated_vars = correlated_vars[correlated_vars.index != max_vif_variable]

    # Add variables to the group if their correlation exceeds a threshold
    for variable, correlation in correlated_vars.items():
        if correlation > 0.7:  # Adjust this threshold as needed
            group.append(variable)
            vif = vif[vif["Variable"] != variable]  # Remove the variable from further consideration

    groups.append(group)

# Print the groups of collinear variables
for i, group in enumerate(groups):
    print(f"Group {i+1}: {group}")


KeyboardInterrupt: 

In [None]:
df_for_training=df[:"1999"]
df_for_validating=df["2000":"2003"]
df_for_testing=df["2004":]
print('df_for_training shape: ',df_for_training.shape)
print('df_for_validating shape: ',df_for_validating.shape)
print('df_for_testing shape: ',df_for_testing.shape)


# without recessions 

# GradientBoostingClassifier

## Logit

In [None]:
# # loading the training dataset 
# # Xtrain=df_for_training.drop(["target"],axis=1).fillna(0)
# Xtrain=data.loc[df_for_training.index].fillna(0)
# ytrain=(df_for_training['target']+1)/2
   
# # building the model and fitting the data
# log_reg = sm.Logit(ytrain, Xtrain).fit()

## CNN-LSTM

In [None]:
def createXY(dataset,n_lookback = 60,n_forecast = 3,target_col=-1):
    X = []
    Y = []

    for i in range(n_lookback, len(dataset) - n_forecast + 1):
        X.append(dataset[i - n_lookback: i, 0:dataset.shape[1]-1])
        Y.append(dataset[i: i + n_forecast,target_col])

    return np.array(X),np.array(Y)

In [None]:

# Scalling

scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled = scaler.fit_transform(df_for_training.fillna(0))
df_for_validating_scaled=scaler.transform(df_for_validating.fillna(0))
df_for_testing_scaled=scaler.transform(df_for_testing.fillna(0))

print()
trainX,trainY=createXY(df_for_training_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
validX,validY=createXY(df_for_validating_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
testX,testY=createXY(df_for_testing_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
trainX.shape,validX.shape,testX.shape

In [None]:
def CustomLoss(labels, pred,w=0.2): 
    logits=tf.math.log(pred/(1-pred))
    return tf.nn.weighted_cross_entropy_with_logits(labels, logits, w)

In [None]:
from tensorflow.keras.layers import LSTM,  Dense, Dropout, Conv1D, MaxPool1D,Flatten,RepeatVector
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
PT_path='Model/CNN2-LSTM-final'
f1 = EarlyStopping(monitor='loss', mode='max', verbose=1, patience=30)
f2 = ModelCheckpoint(filepath=PT_path, monitor='val_recall', mode='max', verbose=0, save_best_only=True)
# f3 = ModelCheckpoint(filepath=PT_path+"/fbeta", monitor='val_fbeta_score', mode='max', verbose=0, save_best_only=True)



In [None]:
import tensorflow_addons as tfa


In [None]:
model = Sequential([
     Conv1D(filters=32,kernel_size=(3,),activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]),)
    ,Conv1D(filters=32,kernel_size=(3,),activation='relu')
    ,MaxPool1D(1)
    ,Dropout(0.1)
    ,Flatten()
    ,RepeatVector(1)
    ,LSTM(100, return_sequences=False)
    ,Dense((n_forecast)
           ,activation='sigmoid'
           )
    ])

model.summary()

model.compile(optimizer='adam',
              metrics=['Recall', 'Precision'],
              loss=tfa.losses.SigmoidFocalCrossEntropy()
            #   loss='BinaryCrossentropy'
              )

# fit the model
history = model.fit(trainX, trainY, epochs=100, 
                    batch_size=320,
                    # validation_split=0.1,
                    validation_data=(validX, validY),
                    verbose=1,
                    callbacks=[
                        #  f1,
                        f2,
                        # f3
                    ],
                    )

In [None]:
# Assuming you have imported necessary libraries and defined the `history` object

# Creating a single figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(10,8))

# Plotting Training and Validation Loss
axes[0].plot(history.history['loss'], label='Training loss')
axes[0].plot(history.history['val_loss'], label='Validation loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()

# Plotting Training and Validation Recall
axes[1].plot(history.history['recall'], label='Training Recall')
axes[1].plot(history.history['val_recall'], label='Validation Recall')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Recall')
axes[1].legend()

plt.tight_layout()  # Ensures proper spacing between subplots
plt.show()


In [None]:
best_model=tf.keras.models.clone_model(model)
best_model.load_weights(PT_path)
best_model.load_weights("Model\CNN2-LSTM-110-test")
# best_model_f2=tf.keras.models.clone_model(model)
# best_model_f2.load_weights(PT_path+"/fbeta")

In [None]:
def align_data(pred, origin):
    aligned = pd.DataFrame(columns=[f'pred{i}' for i in range(n_forecast)], index=pd.date_range(
    start=origin.index[0], freq='D', periods=pred.shape[0]+n_forecast-1))
    for i in range(len(pred)):
        col=i % n_forecast
        try:
            aligned.iloc[i:i+n_forecast,col]=pred[i]
        except IndexError:
            break
    CI=aligned.std(axis=1)* 1.96 / np.sqrt(aligned.shape[1])
    mean=aligned.mean(axis=1)
    aligned=pd.concat([mean,mean-CI,mean+CI],keys=['m','lc',"uc"],axis=1)
    return aligned

# using best Recall model

In [None]:
best_model.load_weights("Model\CNN2-LSTM-final")

prediction_train=best_model.predict(trainX)
prediction_valid=best_model.predict(validX)
prediction_test=best_model.predict(testX)


In [None]:
TT=df_for_testing.iloc[-6116:,[-1]]
TT['Pred']=prediction_test
TT.plot()

In [None]:
# prediction_train_=model.predict(trainX)
# min_length = min(len(df_for_training), len(prediction_train_))
# trainResults= pd.DataFrame(index=df_for_training.iloc[-min_length:,-1].index)

# trainResults['Actual']=recessions
# trainResults['Pred']=align_data(prediction_train_, df_for_training)['m']
# plot(trainResults['Pred'],trainResults["Actual"])


In [None]:

min_length = min(len(df_for_training), len(prediction_train))
trainResults= pd.DataFrame(index=df_for_training.iloc[-min_length:,-1].index)

trainResults['Actual']=recessions
trainResults['Pred']=align_data(prediction_train, df_for_training)['m']

plot(trainResults['Pred'],trainResults["Actual"])
min_length = min(len(df_for_validating), len(prediction_valid))
validResults= pd.DataFrame(index=df_for_validating.iloc[-min_length:,-1].index)

validResults['Actual']=recessions
validResults['Pred']=align_data(prediction_valid[-min_length:], df_for_validating)['m']
plot(validResults['Pred'],validResults['Actual'])
min_length = min(len(df_for_testing), len(prediction_test))
testResults= pd.DataFrame(index=df_for_testing.iloc[-min_length:,-1].index)

testResults['Actual']=recessions
testResults['Pred']=align_data(prediction_test[-min_length:], df_for_testing)['m']

plot(testResults['Pred'],testResults['Actual'])
break

In [None]:

FullResults=pd.concat([trainResults,validResults,testResults]).dropna()
FullResults['Actual']=recessions


In [None]:
def metrics(Results,use_recession=None):
    Results=Results.dropna()
    predicted = Results.Pred    
    if use_recession is not None:
        Results['Actual']=use_recession
    actual = Results.Actual
    # Convert the predicted values to binary (0 or 1)
    predicted_binary = np.array(predicted) > 0.5

    # Calculate TP, FP, FN, and TN
    TP = np.sum((np.array(actual) == 1) & (predicted_binary == 1))
    FP = np.sum((np.array(actual) == 0) & (predicted_binary == 1))
    FN = np.sum((np.array(actual) == 1) & (predicted_binary == 0))
    TN = np.sum((np.array(actual) == 0) & (predicted_binary == 0))


    # Calculate precision, recall, and F2 score
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f2_score = 5 * precision * recall / ((4 * precision) + recall)
    return f2_score,recall,precision


In [None]:
testResults.plot()

In [None]:

Metrics_df=pd.DataFrame(index=['train',"validation",'test'],columns=['f2_score','recall','precision'])
Metrics_df.loc['train']=metrics(trainResults,recessions)
Metrics_df.loc['validation']=metrics(validResults,recessions)
Metrics_df.loc['test']=metrics(testResults,recessions)

Metrics_df

# Feauture importance with permutation


In [None]:
Feauture_data=df["2004":]

print('Feauture_data shape: ',Feauture_data.shape)

# Scalling

Feauture_data_scaled=scaler.transform(Feauture_data.fillna(0))

Feat_X,Feat_Y=createXY(Feauture_data_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
Feat_X.shape,Feat_Y.shape

In [None]:
from tqdm.notebook import tqdm


In [None]:
def var_importance(model,scaler,df_for_testing,mode='perturbation',verbose=False):
    metric= lambda x,y: ((x-y) ** 2).mean() ** 0.5
    Cols=df_for_testing.columns[:-1]
    df_for_testing_scaled=scaler.transform(df_for_testing.fillna(0))
    testX_,testY_=createXY(df_for_testing_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
    orig_out = model.predict(testX_,verbose=verbose)
    Output=pd.DataFrame()
    Output.loc['Base',['MSE_pred','MSE']]=metric(orig_out ,testY_)
    for i in tqdm(range(len(Cols))):  # iterate over the three features
        new_x = testX_.copy()
        
        if mode == "perturbation":
            perturbation = np.random.normal(0.0, 0.2, size=new_x.shape[:2])
            new_x[:, :, i] = new_x[:, :, i] + perturbation
        else:
            np.random.shuffle(new_x[:, :, i])
            
        perturbed_out = model.predict(new_x,verbose=verbose)
        effect = metric(orig_out ,perturbed_out)
        effect2 = metric(testY_ ,perturbed_out)
        Output.loc[Cols[i],['MSE_pred','MSE']]=[effect,effect2]
        
        if verbose:
            print(f'Variable {Cols[i]}, perturbation effect: {effect:.4f}')
    return Output


In [None]:
R=var_importance(model,scaler,Feauture_data,verbose=False)

In [None]:
R

model with full data

In [None]:
df_for_training=df[:"2003"]

df_for_testing=df["2004":]
print('df_for_training shape: ',df_for_training.shape)

print('df_for_testing shape: ',df_for_testing.shape)

# Scalling

scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled = scaler.fit_transform(df_for_training.fillna(0))
df_for_testing_scaled=scaler.transform(df_for_testing.fillna(0))

print()
trainX,trainY=createXY(df_for_training_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
testX,testY=createXY(df_for_testing_scaled,n_lookback=n_lookback,n_forecast=n_forecast)
trainX.shape,validX.shape,testX.shape

In [None]:
model = Sequential()
model.add(Conv1D(filters=32,kernel_size=(3,),activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]),))
model.add(Conv1D(filters=32,kernel_size=(3,),activation='relu'))
model.add(MaxPool1D(1))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(RepeatVector(1))
model.add(LSTM(100, return_sequences=False))
model.add(Dense((n_forecast),activation='sigmoid'))

model.summary()

model.compile(optimizer='adam',
              metrics=['Recall', 'Precision'],
              loss=tfa.losses.SigmoidFocalCrossEntropy()
            #   loss='BinaryCrossentropy'
              )

# fit the model
history = model.fit(trainX, trainY, epochs=100, 
                    batch_size=320,
                    # validation_split=0.1,
                    verbose=1,
                    callbacks=[
                        #  f1,
                        f2,
                        # f3
                    ],
                    )

In [None]:
# Assuming you have imported necessary libraries and defined the `history` object

# Creating a single figure with subplots
fig, axes = plt.subplots(2, 1, figsize=(10,8))

# Plotting Training and Validation Loss
axes[0].plot(history.history['loss'], label='Training loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()

# Plotting Training and Validation Recall
axes[1].plot(history.history['recall'], label='Training Recall')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Recall')
axes[1].legend()

plt.tight_layout()  # Ensures proper spacing between subplots
plt.show()


In [None]:
best_model_=tf.keras.models.clone_model(model)
best_model_.load_weights(PT_path)
prediction_train=best_model_.predict(trainX)
prediction_valid=best_model_.predict(validX)
prediction_test=best_model_.predict(testX)


min_length = min(len(df_for_training), len(prediction_train))
trainResults= pd.DataFrame(index=df_for_training.iloc[-min_length:,-1].index)

trainResults['Actual']=recessions.shift(-250)
trainResults['Pred']=align_data(prediction_train, df_for_training)['m']

plot(trainResults['Pred'],trainResults["Actual"])
min_length = min(len(df_for_validating), len(prediction_valid))
validResults= pd.DataFrame(index=df_for_validating.iloc[-min_length:,-1].index)

validResults['Actual']=recessions.shift(-250)
validResults['Pred']=align_data(prediction_valid[-min_length:], df_for_validating)['m']
plot(validResults['Pred'],validResults['Actual'])
min_length = min(len(df_for_testing), len(prediction_test))
testResults= pd.DataFrame(index=df_for_testing.iloc[-min_length:,-1].index)

testResults['Actual']=recessions.shift(-250)
testResults['Pred']=align_data(prediction_test[-min_length:], df_for_testing)['m']

plot(testResults['Pred'],testResults['Actual'])

In [None]:
break
Model_path= 'Model/CNN2-LSTM-test'
load_model=tf.keras.models.clone_model(model)
load_model.load_weights(Model_path)

In [None]:
prediction_train=load_model.predict(trainX)
prediction_valid=load_model.predict(validX)
prediction_test=load_model.predict(testX)

min_length = min(len(df_for_training), len(prediction_train))
trainResults= pd.DataFrame(index=df_for_training.iloc[-min_length:,-1].index)

trainResults['Actual']=recessions
trainResults['Pred']=align_data(prediction_train, df_for_training)['m']

plot(trainResults['Pred'],trainResults["Actual"])
min_length = min(len(df_for_validating), len(prediction_valid))
validResults= pd.DataFrame(index=df_for_validating.iloc[-min_length:,-1].index)

validResults['Actual']=recessions
validResults['Pred']=align_data(prediction_valid[-min_length:], df_for_validating)['m']
plot(validResults['Pred'],validResults['Actual'])
min_length = min(len(df_for_testing), len(prediction_test))
testResults= pd.DataFrame(index=df_for_testing.iloc[-min_length:,-1].index)

testResults['Actual']=recessions
testResults['Pred']=align_data(prediction_test[-min_length:], df_for_testing)['m']

plot(testResults['Pred'],testResults['Actual'])

# Tunning 
    

In [None]:
break

In [None]:
import keras_tuner as kt
from tensorflow.keras.regularizers import l1, l2,l1_l2

# Define the model-building function
def build_model(hp):
    model = Sequential()
    model.add(Conv1D(filters=32, # hp.Int('CNN_1_filters', min_value=16, max_value=256, step=64),
                     kernel_size=hp.Choice('CNN_1_kernel_size', values=[k for k in range(10) if k%2==1]),
                     activation='relu',
                     input_shape=(trainX.shape[1], trainX.shape[2]),
                     kernel_regularizer=l1(hp.Choice('CNN1_l1_regularization', values=[0.0, 1e-3, 1e-2]))))
    model.add(Conv1D(filters=32, # hp.Int('CNN_2_filters', min_value=16, max_value=256, step=64),
                     kernel_size=hp.Choice('CNN_2_kernel_size', values=[k for k in range(10) if k%2==1]),
                     activation='relu',
                     kernel_regularizer=l2(hp.Choice('CNN1_l2_regularization', values=[0.0, 1e-3, 1e-2]))))
    model.add(MaxPool1D(pool_size=2))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(RepeatVector(n_forecast))
    model.add(LSTM(units=100,
                   return_sequences=hp.Boolean("return_sequences",),
                   kernel_regularizer=l1_l2(hp.Choice('LSTM_l1_l2_regularization', values=[0.0, 1e-3, 1e-2]))))
    model.add(Dense(units=n_forecast, activation='sigmoid',
                    kernel_regularizer=l2(hp.Choice('dense_L2_regularization', values=[0.0, 1e-3, 1e-2]))))
    
    model.compile(optimizer='adam',
                  metrics=['Recall', 'Precision', 'AUC', 'mse'],
                  loss=CustomLoss)
    
    return model


In [None]:
# Instantiate the tuner
tuner = kt.Hyperband(
    build_model,
    objective=kt.Objective("val_recall", direction="max"),
    max_epochs=175,
    # executions_per_trial=5,
    factor=2,
    overwrite=False,
    project_name='model_tuning_withFbeta',
    directory="with Custom Loss"
)
tuner.search_space_summary()

In [None]:

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]
top10_model = tuner.get_best_models(num_models=10)

best_hp = tuner.get_best_hyperparameters()[0]
best_hp_model = tuner.hypermodel.build(best_hp)

# Compile the best model
best_hp_model.compile(optimizer='adam',
                   metrics=['Recall', 'Precision', 'AUC', 'mse'],
                   loss="BinaryCrossentropy")


In [None]:
# model = Sequential()
# model.add(Conv1D(filters=32,kernel_size=(3,),activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]),))
# model.add(Conv1D(filters=32,kernel_size=(3,),activation='relu'))
# # model.add(Conv1D(filters=32,kernel_size=(3,),activation='relu'))
# model.add(MaxPool1D(1))
# model.add(Dropout(0.1))
# model.add(Flatten())
# model.add(RepeatVector(1))
# model.add(LSTM(100, return_sequences=False))
# model.add(Dense((n_forecast)
#             ,activation='sigmoid'
#             ))


model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]),),
    Conv1D(filters=32, kernel_size=3, activation='relu'),
    MaxPool1D(pool_size=2),
    Dropout(0.25),
    Flatten(),
    RepeatVector(n_forecast),
    LSTM(100, return_sequences=False),
    Dense(units=n_forecast, activation='sigmoid')
])

# fit the model
model.compile(optimizer='adam',
              metrics=[metric,'Recall', 'Precision', "AUC",],
            #   loss=CustomLoss
              loss='BinaryCrossentropy'
              )

model.summary()
# fit the model
history = model.fit(trainX, trainY, epochs=350, batch_size=32,
                    # validation_split=0.1,
                    validation_data=(validX, validY),
                    verbose=1,
                    callbacks=[
                        #  f1,
                        f2
                    ],
                    )

In [None]:
import matplotlib.pyplot as plt

def plot_metric(history, metric_name, label):
    plt.plot(history.history[metric_name], label=f'Training {label}')
    plt.plot(history.history[f'val_{metric_name}'], label=f'Validation {label}')
    plt.xlabel('Epochs')
    plt.ylabel(metric_name.capitalize())
    plt.legend()
    plt.show()

# Assuming you have the `history` object containing the metrics history


In [None]:
best_model.compile(optimizer='adam',
                  metrics=['Recall', 'Precision', 'AUC', 'mse'],
                  loss=CustomLoss)

In [None]:
history=best_model.fit(trainX, trainY, epochs=250, batch_size=320,
                    # validation_split=0.1,
                    validation_data=(validX, validY),
                    verbose=1,
                    callbacks=[
                        #  f1,
                        f2
                    ],
                    )

In [None]:
for i,model in enumerate(top10_model[n-1:n]):
    prediction_train=model.predict(trainX)
    prediction_valid=model.predict(validX)
    prediction_test=model.predict(testX)
    print(f'result for {i+1} th model ')
    min_length = min(len(df_for_training), len(prediction_train))
    trainResults= pd.DataFrame(index=df_for_training.iloc[-min_length:,-1].index)

    trainResults['Actual']=recessions
    trainResults['Pred']=align_data(prediction_train, df_for_training)['m']
    plot(trainResults['Pred'],trainResults["Actual"])
    
    min_length = min(len(df_for_validating), len(prediction_valid))
    validResults= pd.DataFrame(index=df_for_validating.iloc[-min_length:,-1].index)

    validResults['Actual']=recessions
    validResults['Pred']=align_data(prediction_valid[-min_length:], df_for_validating)['m']
    plot(validResults['Pred'],validResults['Actual'])
    
    min_length = min(len(df_for_testing), len(prediction_test))
    testResults= pd.DataFrame(index=df_for_testing.iloc[-min_length:,-1].index)

    testResults['Actual']=recessions
    testResults['Pred']=align_data(prediction_test[-min_length:], df_for_testing)['m']
    plot(testResults['Pred'],testResults['Actual'])
    