In [1]:
import os
import pandas as pd
data_daily = pd.read_csv('data_for_forecasting.csv', encoding = 'utf-8-sig')

In [2]:
data_daily.head()

Unnamed: 0,log_ret,equally_weighted_index,equally_weighted_volume,date,sentiment_LM,sentiment_extend,Component1extend,Component2extend,Component3extend,Component4extend,...,Component1LM,Component2LM,Component3LM,Component4LM,Component5LM,Component6LM,Component7LM,Component8LM,Component9LM,Component10LM
0,0.024653,123.982993,955224.563689,2016-01-13,0.003378,0.002483,0.915952,0.198014,0.14871,0.144552,...,1.669572,-0.10476,-0.040353,0.042248,0.455844,0.089934,0.093672,-0.672846,-0.219977,0.186504
1,-0.032606,120.005545,850090.878743,2016-01-14,0.003378,0.002483,0.915952,0.198014,0.14871,0.144552,...,1.669572,-0.10476,-0.040353,0.042248,0.455844,0.089934,0.093672,-0.672846,-0.219977,0.186504
2,0.004484,120.544918,759009.187526,2016-01-15,0.003378,0.002483,0.915952,0.198014,0.14871,0.144552,...,1.669572,-0.10476,-0.040353,0.042248,0.455844,0.089934,0.093672,-0.672846,-0.219977,0.186504
3,-0.028255,117.186612,438230.369985,2016-01-18,0.006187,0.006968,3.278263,-0.527159,-0.984497,1.777233,...,4.347198,-0.908943,2.394316,-0.003131,0.014896,-1.534704,1.364388,0.121622,0.972589,-0.302632
4,0.047632,122.903516,597259.92384,2016-01-19,-0.002157,0.000638,0.768733,0.020885,0.008943,0.303239,...,-0.573484,-0.299402,-1.087171,0.172037,0.295554,0.732241,-0.741438,-0.244923,-0.427908,-0.176453


In [3]:
import numpy as np

def classify_return(row, lower, upper):
    if row < lower:
        return 0
    elif lower <= row <= upper:
        return 1
    else:
        return 2

def label_returns(df):
    lower_quantile = df['log_ret'].quantile(0.25)
    upper_quantile = df['log_ret'].quantile(0.75)

    df['labels'] = df['log_ret'].apply(classify_return, args=(lower_quantile, upper_quantile))
    return df

data_daily = label_returns(data_daily)

## Prices only

In [4]:
y = data_daily['labels'][1:]
columns = ['equally_weighted_index', 'equally_weighted_volume']
X = data_daily[columns].iloc[:-1]

In [5]:
from sklearn.preprocessing import StandardScaler

# Determine the split point for 90% of the data
split_point = int(len(X) * 0.90)

# Split the data without shuffling
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
%%time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define the parameter ranges
C_range = [10**i for i in np.arange(0, 3.25, 0.25)]
degree_range = [2, 3, 4, 5]

# Create the parameter grid
param_grid = {'C': C_range, 'degree': degree_range}

# Create the SVC model with a polynomial kernel
svc = SVC(kernel='poly', class_weight='balanced')

# Create the GridSearchCV model
grid_search = GridSearchCV(svc, param_grid, cv=10, scoring='f1_weighted')

# Fit the GridSearchCV model
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters
print('Best parameters: ', grid_search.best_params_)

Best parameters:  {'C': 5.623413251903491, 'degree': 3}
CPU times: total: 1h 50min 1s
Wall time: 1h 50min 7s


In [7]:
best_c = grid_search.best_params_['C']
best_d = grid_search.best_params_['degree']

# Create a SVC model with the best parameters
svc_model_best = SVC(C=best_c, kernel='poly', degree=best_d, decision_function_shape='ovo')

# Train the model
svc_model_best.fit(X_train_scaled, y_train)

# Compute predicted labels for the test set
predictions = svc_model_best.predict(X_test_scaled)

In [8]:
from sklearn.metrics import classification_report, confusion_matrix 

# Print out classification report
print("Classification Report:")
print(classification_report(y_test, predictions, zero_division=0))

# Compute and print out confusion matrix
conf_mat = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_mat.transpose())

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.45      1.00      0.62        74
           2       0.00      0.00      0.00        49

    accuracy                           0.45       163
   macro avg       0.15      0.33      0.21       163
weighted avg       0.21      0.45      0.28       163

Confusion Matrix:
[[ 0  0  0]
 [40 74 49]
 [ 0  0  0]]


In [9]:
from sklearn.metrics import accuracy_score, f1_score

# Calculate average accuracy
accuracy_p = accuracy_score(y_test, predictions)
print(f"Average Accuracy: {accuracy_p}")

# Get classification report as a dictionary
classification_report_dict = classification_report(y_test, predictions, zero_division=0, output_dict=True)

# Extract weighted macro F1-score
f1_p = classification_report_dict['weighted avg']['f1-score']
print(f"Weighted Macro F1-Score: {f1_p}")

Average Accuracy: 0.4539877300613497
Weighted Macro F1-Score: 0.28350288628303694


## Prices and sentiment LM

In [10]:
y = data_daily['labels'][1:]
columns = ['equally_weighted_index', 'equally_weighted_volume', 'sentiment_LM']
X = data_daily[columns].iloc[:-1]

In [11]:
# Determine the split point for 90% of the data
split_point = int(len(X) * 0.90)

# Split the data without shuffling
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
%%time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define the parameter ranges
C_range = [10**i for i in np.arange(0, 3.25, 0.25)]
degree_range = [2, 3, 4, 5]

# Create the parameter grid
param_grid = {'C': C_range, 'degree': degree_range}

# Create the SVC model with a polynomial kernel
svc = SVC(kernel='poly', class_weight='balanced')

# Create the GridSearchCV model
grid_search = GridSearchCV(svc, param_grid, cv=10, scoring='f1_weighted')

# Fit the GridSearchCV model
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters
print('Best parameters: ', grid_search.best_params_)

Best parameters:  {'C': 3.1622776601683795, 'degree': 2}
CPU times: total: 1h 37min 55s
Wall time: 1h 38min 1s


In [13]:
best_c = grid_search.best_params_['C']
best_d = grid_search.best_params_['degree']

# Create a SVC model with the best parameters
svc_model_best = SVC(C=best_c, kernel='poly', degree=best_d, decision_function_shape='ovo')

# Train the model
svc_model_best.fit(X_train_scaled, y_train)

# Compute predicted labels for the test set
predictions = svc_model_best.predict(X_test_scaled)

In [14]:
from sklearn.metrics import classification_report, confusion_matrix 

# Print out classification report
print("Classification Report:")
print(classification_report(y_test, predictions, zero_division=0))

# Compute and print out confusion matrix
conf_mat = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_mat.transpose())

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.45      1.00      0.62        74
           2       0.00      0.00      0.00        49

    accuracy                           0.45       163
   macro avg       0.15      0.33      0.21       163
weighted avg       0.21      0.45      0.28       163

Confusion Matrix:
[[ 0  0  0]
 [40 74 49]
 [ 0  0  0]]


In [15]:
# Calculate average accuracy
accuracy_p_lm = accuracy_score(y_test, predictions)
print(f"Average Accuracy: {accuracy_p_lm}")

# Get classification report as a dictionary
classification_report_dict = classification_report(y_test, predictions, zero_division=0, output_dict=True)

# Extract weighted macro F1-score
f1_p_lm = classification_report_dict['weighted avg']['f1-score']
print(f"Weighted Macro F1-Score: {f1_p_lm}")

Average Accuracy: 0.4539877300613497
Weighted Macro F1-Score: 0.28350288628303694


In [16]:
delta_lm = (accuracy_p_lm - accuracy_p) / accuracy_p
delta_lm_f1 = (f1_p_lm - f1_p) / f1_p

print(f"Relative improvement for accuracy: {delta_lm}")
print(f"Relative improvement for F1 score: {delta_lm_f1}")

Relative improvement for accuracy: 0.0
Relative improvement for F1 score: 0.0


## Prices and components based on topics*LM sentiment

In [17]:
def add_lags(df, column_name, max_lag):
    """
    This function adds lagged values of a specified column to a copy of the dataframe.
    
    Args:
    df: The dataframe.
    column_name: The name of the column for which lagged values are to be added.
    max_lag: The maximum number of lags to be added.
    
    Returns:
    df_copy: A copy of the dataframe with added lagged value columns.
    """
    df_copy = df.copy()  # Create a copy of the DataFrame
    for lag in range(1, max_lag + 1):
        df_copy[f'{column_name}_lag_{lag}'] = df_copy[column_name].shift(lag)
    return df_copy

In [18]:
# Split the dataset into train and test first
mask = (data_daily['date'] <= '2021-11-12')
train_data = data_daily.loc[mask]
mask = (data_daily['date'] >= '2021-11-12')
test_data = data_daily.loc[mask]

# Add lags to 'equally_weighted_index', 'equally_weighted_volume' and 'ComponentiLM' columns in train set
train_data = add_lags(train_data, 'equally_weighted_index', 1)
train_data = add_lags(train_data, 'equally_weighted_volume', 1)

for i in range(1, 11):
    train_data = add_lags(train_data, f'Component{i}LM', 10)

# Select the required columns
required_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + [f'Component{i}LM_lag_{j}' for i in range(1, 11) for j in range(1, 11)]
X = train_data[required_columns]
y = train_data['labels'].shift(-10)
X_train = X.dropna().reset_index(drop=True)
y_train = y.dropna().reset_index(drop=True).iloc[:len(X_train)]

# Add lags to 'equally_weighted_index', 'equally_weighted_volume' and 'ComponentiLM' columns in test set
test_data = add_lags(test_data, 'equally_weighted_index', 1)
test_data = add_lags(test_data, 'equally_weighted_volume', 1)

for i in range(1, 11):
    test_data = add_lags(test_data, f'Component{i}LM', 10)

# Select the required columns
required_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + [f'Component{i}LM_lag_{j}' for i in range(1, 11) for j in range(1, 11)]
X = test_data[required_columns]
y = test_data['labels'].shift(-10)
X_test = X.dropna().reset_index(drop=True)
y_test = y.dropna().reset_index(drop=True).iloc[:len(X_test)]

In [19]:
# Drop the specified columns
X_train_dropped = X_train.drop(['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'], axis=1)

# Calculate correlations of all remaining variables with `train_data['log_ret'].iloc[10:]`
correlations = X_train_dropped.corrwith(train_data['log_ret'].iloc[10:])

# Sort correlations in descending order
sorted_correlations = correlations.sort_values(ascending=False)

# Select top 5
top_5_variables = sorted_correlations[:5]

# Print top 5 variables along with their correlation coefficients
for variable, correlation in top_5_variables.items():
    print(f"{variable}: {correlation}")

Component10LM_lag_7: 0.051230184241513294
Component8LM_lag_6: 0.04940627003969241
Component6LM_lag_9: 0.04731174252128467
Component2LM_lag_5: 0.0466248893319868
Component6LM_lag_8: 0.038765315105760235


In [20]:
# Select top 5 correlated variable names
top_5_variable_names = top_5_variables.index.tolist()

# Combine with 'equally_weighted_index_lag_1' and 'equally_weighted_volume_lag_1'
selected_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + top_5_variable_names + ['Component1LM_lag_1']

# Select these columns from X_train and X_test
X_train_selected = X_train[selected_columns]
X_test_selected = X_test[selected_columns]

In [21]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)

In [22]:
%%time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define the parameter ranges
C_range = [10**i for i in np.arange(0, 3.25, 0.25)]
degree_range = [2, 3, 4, 5]

# Create the parameter grid
param_grid = {'C': C_range, 'degree': degree_range}

# Create the SVC model with a polynomial kernel
svc = SVC(kernel='poly', class_weight='balanced')

# Create the GridSearchCV model
grid_search = GridSearchCV(svc, param_grid, cv=10, scoring='f1_weighted')

# Fit the GridSearchCV model
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters
print('Best parameters: ', grid_search.best_params_)

Best parameters:  {'C': 562.341325190349, 'degree': 5}
CPU times: total: 24min 52s
Wall time: 24min 52s


In [23]:
best_c = grid_search.best_params_['C']
best_d = grid_search.best_params_['degree']

# Create a SVC model with the best parameters
svc_model_best = SVC(C=best_c, kernel='poly', degree=best_d, decision_function_shape='ovo')

# Train the model
svc_model_best.fit(X_train_scaled, y_train)

# Compute predicted labels for the test set
predictions = svc_model_best.predict(X_test_scaled)

In [24]:
from sklearn.metrics import classification_report, confusion_matrix 

# Print out classification report
print("Classification Report:")
print(classification_report(y_test, predictions, zero_division=0))

# Compute and print out confusion matrix
conf_mat = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_mat.transpose())

Classification Report:
              precision    recall  f1-score   support

         0.0       0.26      0.32      0.29        37
         1.0       0.42      0.52      0.46        69
         2.0       0.19      0.08      0.12        48

    accuracy                           0.34       154
   macro avg       0.29      0.31      0.29       154
weighted avg       0.31      0.34      0.31       154

Confusion Matrix:
[[12 22 13]
 [19 36 31]
 [ 6 11  4]]


In [25]:
# Calculate average accuracy
accuracy_p_lm_comp = accuracy_score(y_test, predictions)
print(f"Average Accuracy: {accuracy_p_lm_comp}")

# Get classification report as a dictionary
classification_report_dict = classification_report(y_test, predictions, zero_division=0, output_dict=True)

# Extract weighted macro F1-score
f1_p_lm_comp = classification_report_dict['weighted avg']['f1-score']
print(f"Weighted Macro F1-Score: {f1_p_lm_comp}")

Average Accuracy: 0.33766233766233766
Weighted Macro F1-Score: 0.3129107718568749


In [26]:
delta_lm_comp = (accuracy_p_lm_comp - accuracy_p) / accuracy_p
delta_lm_comp_f1 = (f1_p_lm_comp - f1_p) / f1_p

print(f"Relative improvement for accuracy: {delta_lm_comp}")
print(f"Relative improvement for F1 score: {delta_lm_comp_f1}")

Relative improvement for accuracy: -0.2562302562302562
Relative improvement for F1 score: 0.10373046271027517


## Prices and extended sentiment

In [27]:
y = data_daily['labels'][1:]
columns = ['equally_weighted_index', 'equally_weighted_volume', 'sentiment_extend']
X = data_daily[columns].iloc[:-1]

In [28]:
# Determine the split point for 90% of the data
split_point = int(len(X) * 0.90)

# Split the data without shuffling
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [29]:
%%time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define the parameter ranges
C_range = [10**i for i in np.arange(0, 3.25, 0.25)]
degree_range = [2, 3, 4, 5]

# Create the parameter grid
param_grid = {'C': C_range, 'degree': degree_range}

# Create the SVC model with a polynomial kernel
svc = SVC(kernel='poly', class_weight='balanced')

# Create the GridSearchCV model
grid_search = GridSearchCV(svc, param_grid, cv=10, scoring='f1_weighted')

# Fit the GridSearchCV model
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters
print('Best parameters: ', grid_search.best_params_)

Best parameters:  {'C': 3.1622776601683795, 'degree': 2}
CPU times: total: 2h 15min 22s
Wall time: 2h 15min 28s


In [30]:
best_c = grid_search.best_params_['C']
best_d = grid_search.best_params_['degree']

# Create a SVC model with the best parameters
svc_model_best = SVC(C=best_c, kernel='poly', degree=best_d, decision_function_shape='ovo')

# Train the model
svc_model_best.fit(X_train_scaled, y_train)

# Compute predicted labels for the test set
predictions = svc_model_best.predict(X_test_scaled)

In [31]:
from sklearn.metrics import classification_report, confusion_matrix 

# Print out classification report
print("Classification Report:")
print(classification_report(y_test, predictions, zero_division=0))

# Compute and print out confusion matrix
conf_mat = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_mat.transpose())

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.45      1.00      0.62        74
           2       0.00      0.00      0.00        49

    accuracy                           0.45       163
   macro avg       0.15      0.33      0.21       163
weighted avg       0.21      0.45      0.28       163

Confusion Matrix:
[[ 0  0  0]
 [40 74 49]
 [ 0  0  0]]


In [32]:
# Calculate average accuracy
accuracy_p_extend = accuracy_score(y_test, predictions)
print(f"Average Accuracy: {accuracy_p_extend}")

# Get classification report as a dictionary
classification_report_dict = classification_report(y_test, predictions, zero_division=0, output_dict=True)

# Extract weighted macro F1-score
f1_p_extend = classification_report_dict['weighted avg']['f1-score']
print(f"Weighted Macro F1-Score: {f1_p_extend}")

Average Accuracy: 0.4539877300613497
Weighted Macro F1-Score: 0.28350288628303694


In [33]:
delta_extend = (accuracy_p_extend - accuracy_p) / accuracy_p
delta_extend_f1 = (f1_p_extend - f1_p) / f1_p

print(f"Relative improvement for accuracy: {delta_extend}")
print(f"Relative improvement for F1 score: {delta_extend_f1}")

Relative improvement for accuracy: 0.0
Relative improvement for F1 score: 0.0


## Prices and components based on topics*extended sentiment

In [34]:
# Split the dataset into train and test first
mask = (data_daily['date'] <= '2021-11-12')
train_data = data_daily.loc[mask]
mask = (data_daily['date'] >= '2021-11-12')
test_data = data_daily.loc[mask]

# Add lags to 'equally_weighted_index', 'equally_weighted_volume' and 'Componentiextend' columns in train set
train_data = add_lags(train_data, 'equally_weighted_index', 1)
train_data = add_lags(train_data, 'equally_weighted_volume', 1)

for i in range(1, 11):
    train_data = add_lags(train_data, f'Component{i}extend', 10)

# Select the required columns
required_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + [f'Component{i}extend_lag_{j}' for i in range(1, 11) for j in range(1, 11)]
X = train_data[required_columns]
y = train_data['labels'].shift(-10)
X_train = X.dropna().reset_index(drop=True)
y_train = y.dropna().reset_index(drop=True).iloc[:len(X_train)]

# Add lags to 'equally_weighted_index', 'equally_weighted_volume' and 'Componentiextend' columns in test set
test_data = add_lags(test_data, 'equally_weighted_index', 1)
test_data = add_lags(test_data, 'equally_weighted_volume', 1)

for i in range(1, 11):
    test_data = add_lags(test_data, f'Component{i}extend', 10)

# Select the required columns
required_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + [f'Component{i}extend_lag_{j}' for i in range(1, 11) for j in range(1, 11)]
X = test_data[required_columns]
y = test_data['labels'].shift(-10)
X_test = X.dropna().reset_index(drop=True)
y_test = y.dropna().reset_index(drop=True).iloc[:len(X_test)]

In [35]:
# Drop the specified columns
X_train_dropped = X_train.drop(['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'], axis=1)

# Calculate correlations of all remaining variables with `train_data['log_ret'].iloc[10:]`
correlations = X_train_dropped.corrwith(train_data['log_ret'].iloc[10:])

# Sort correlations in descending order
sorted_correlations = correlations.sort_values(ascending=False)

# Select top 5
top_5_variables = sorted_correlations[:5]

# Print top 5 variables along with their correlation coefficients
for variable, correlation in top_5_variables.items():
    print(f"{variable}: {correlation}")

Component6extend_lag_1: 0.06625118490229949
Component6extend_lag_2: 0.05931828806761272
Component9extend_lag_10: 0.0587815940867539
Component4extend_lag_5: 0.041587725346739095
Component7extend_lag_7: 0.038547576458769536


In [36]:
# Select top 5 correlated variable names
top_5_variable_names = top_5_variables.index.tolist()

# Combine with 'equally_weighted_index_lag_1' and 'equally_weighted_volume_lag_1'
selected_columns = ['equally_weighted_index_lag_1', 'equally_weighted_volume_lag_1'] + top_5_variable_names + ['Component1extend_lag_1']

# Select these columns from X_train and X_test
X_train_selected = X_train[selected_columns]
X_test_selected = X_test[selected_columns]

In [37]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)

In [38]:
%%time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Define the parameter ranges
C_range = [10**i for i in np.arange(0, 3.25, 0.25)]
degree_range = [2, 3, 4, 5]

# Create the parameter grid
param_grid = {'C': C_range, 'degree': degree_range}

# Create the SVC model with a polynomial kernel
svc = SVC(kernel='poly', class_weight='balanced')

# Create the GridSearchCV model
grid_search = GridSearchCV(svc, param_grid, cv=10, scoring='f1_weighted')

# Fit the GridSearchCV model
grid_search.fit(X_train_scaled, y_train)

# Print the best parameters
print('Best parameters: ', grid_search.best_params_)

Best parameters:  {'C': 562.341325190349, 'degree': 5}
CPU times: total: 23min 7s
Wall time: 23min 7s


In [39]:
best_c = grid_search.best_params_['C']
best_d = grid_search.best_params_['degree']

# Create a SVC model with the best parameters
svc_model_best = SVC(C=best_c, kernel='poly', degree=best_d, decision_function_shape='ovo')

# Train the model
svc_model_best.fit(X_train_scaled, y_train)

# Compute predicted labels for the test set
predictions = svc_model_best.predict(X_test_scaled)

In [40]:
# Print out classification report
print("Classification Report:")
print(classification_report(y_test, predictions, zero_division=0))

# Compute and print out confusion matrix
conf_mat = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_mat.transpose())

Classification Report:
              precision    recall  f1-score   support

         0.0       0.21      0.16      0.18        37
         1.0       0.49      0.67      0.56        69
         2.0       0.32      0.21      0.25        48

    accuracy                           0.40       154
   macro avg       0.34      0.35      0.33       154
weighted avg       0.37      0.40      0.38       154

Confusion Matrix:
[[ 6 13 10]
 [20 46 28]
 [11 10 10]]


In [41]:
# Calculate average accuracy
accuracy_p_extend_comp = accuracy_score(y_test, predictions)
print(f"Average Accuracy: {accuracy_p_extend_comp}")

# Get classification report as a dictionary
classification_report_dict = classification_report(y_test, predictions, zero_division=0, output_dict=True)

# Extract weighted macro F1-score
f1_p_extend_comp = classification_report_dict['weighted avg']['f1-score']
print(f"Weighted Macro F1-Score: {f1_p_extend_comp}")

Average Accuracy: 0.4025974025974026
Weighted Macro F1-Score: 0.3754802385553478


In [42]:
delta_extend_comp = (accuracy_p_extend_comp - accuracy_p) / accuracy_p
delta_extend_comp_f1 = (f1_p_extend_comp - f1_p) / f1_p

print(f"Relative improvement for accuracy: {delta_extend_comp}")
print(f"Relative improvement for F1 score: {delta_extend_comp_f1}")

Relative improvement for accuracy: -0.11319761319761312
Relative improvement for F1 score: 0.32443180201165456


In [43]:
# Create a dictionary where the keys are the metric names and the values are the metric values
metrics_dict = {
    'accuracy_p': accuracy_p,
    'f1_p': f1_p,
    'accuracy_p_lm': accuracy_p_lm,
    'f1_p_lm': f1_p_lm,
    'accuracy_p_lm_comp': accuracy_p_lm_comp,
    'f1_p_lm_comp': f1_p_lm_comp,
    'accuracy_p_extend': accuracy_p_extend,
    'f1_p_extend': f1_p_extend,
    'accuracy_p_extend_comp': accuracy_p_extend_comp,
    'f1_p_extend_comp': f1_p_extend_comp
}

# Convert the dictionary to a DataFrame
metrics_df = pd.DataFrame(metrics_dict, index=[0])

# Save the DataFrame to a csv file
metrics_df.to_csv('metrics_final_experiment_4.csv', index=False)

In [44]:
# Create a dictionary where the keys are the delta names and the values are the delta values
deltas_dict = {
    'delta_lm': delta_lm,
    'delta_lm_f1': delta_lm_f1,
    'delta_lm_comp': delta_lm_comp,
    'delta_lm_comp_f1': delta_lm_comp_f1,
    'delta_extend': delta_extend,
    'delta_extend_f1': delta_extend_f1,
    'delta_extend_comp': delta_extend_comp,
    'delta_extend_comp_f1': delta_extend_comp_f1
}

# Convert the dictionary to a DataFrame
deltas_df = pd.DataFrame(deltas_dict, index=[0])

# Save the DataFrame to a csv file
deltas_df.to_csv('deltas_final_experiment_4.csv', index=False)