<a href="https://colab.research.google.com/github/ThalyaGIT/UK-Music-Index-Returns/blob/main/3_data-analysis_notebooks/UK_Music_Happiness_and_Index_Returns.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Import packages
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate


In [6]:
# Open CSV into dataframe
url_1_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_1_days.csv'
url_3_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_3_days.csv'
url_5_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_5_days.csv'
url_10_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_10_days.csv'
url_20_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_20_days.csv'

df_1_day = pd.read_csv(url_1_day)
df_3_day = pd.read_csv(url_3_day)
df_5_day = pd.read_csv(url_5_day)
df_10_day = pd.read_csv(url_10_day)
df_20_day = pd.read_csv(url_20_day)

# # Attempt to convert the 'Date' column to datetime, coercing errors to NaT (Not a Time)
df_1_day['Date'] = pd.to_datetime(df_1_day['Date'], errors='coerce')



# **Next Period**

In [7]:
# Initialize an empty list to store results
results = []

indices = ['FTSE100', 'MSCIUK', 'FTSEAllShare', 'FTSE250', 'FTSESmallCap','FTSEAIM']
days_list = [1, 3, 5, 10, 20]

for days in days_list:
    result_row = [days]
    for index in indices:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])

        # Extract the month from the 'Date' column
        df['Month'] = df['Date'].dt.month

        # Create dummy variables for the months
        month_dummies = pd.get_dummies(df['Month'], prefix='Month', drop_first=True)

        # Convert boolean dummy variables to integers
        month_dummies = month_dummies.astype(int)

        # Define the dependent variable
        y = df[f'% {index} Change']

        # Define the independent variables
        X = df[['Change in SWAV',
                'ADS_Change',
                'EPU_Change',
                f'Previous % {index} Change',
                '% MSCI Change',
                'Vix Close',
                'Rolling_Avg_Change_in_DCC']]

        # Add the month dummies to the independent variables
        X = pd.concat([X, month_dummies], axis=1)

        # Convert all columns to numeric, coercing errors to NaN
        X = X.apply(pd.to_numeric, errors='coerce')
        y = pd.to_numeric(y, errors='coerce')

        # Drop rows with any NaN values
        X = X.dropna()
        y = y.loc[X.index]  # Ensure 'y' aligns with 'X' after dropping NaNs

        # Ensure that both X and y are aligned and are purely numeric
        if X.shape[0] > 0 and y.shape[0] > 0:  # Proceed only if there's valid data
            # Add a constant term to the model
            X = sm.add_constant(X)

            # Fit the model
            model = sm.OLS(y, X).fit()

            # Extract the coefficient and p-value for 'Change in SWAV'
            coef = round(model.params['Change in SWAV'], 2)
            p_value = round(model.pvalues['Change in SWAV'], 5)

            result_row.extend([coef, p_value])  # Add both coef and p-value to the result row
        else:
            result_row.extend([None, None])  # Handle cases where there's no valid data

    # Append the result row for this combination of days
    results.append(result_row)

# Define column names dynamically
columns = ['Days']
for index in indices:
    columns.extend([f'{index} Coef', f'{index} P-value'])  # Ensure you have columns for both coef and p-value

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=columns)

# Define a function to apply the styling
def color_rows(row):
    colors = []
    for i in range(1, len(row), 2):  # Skip Days, then iterate through Coefs and P-values
        coef = row[i]
        p_value = row[i+1]
        if p_value and p_value < 0.1:  # Consider p-values < 0.1 as significant
            if coef > 0:
                colors.extend(['background-color: green', ''])
            elif coef < 0:
                colors.extend(['background-color: red', ''])
        else:
            colors.extend(['background-color: #d3d3d3', ''])  # Use a lighter grey color
    return [''] * 1 + colors  # No coloring for Days

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

  coef = row[i]
  p_value = row[i+1]


Unnamed: 0,Days,FTSE100 Coef,FTSE100 P-value,MSCIUK Coef,MSCIUK P-value,FTSEAllShare Coef,FTSEAllShare P-value,FTSE250 Coef,FTSE250 P-value,FTSESmallCap Coef,FTSESmallCap P-value,FTSEAIM Coef,FTSEAIM P-value
0,1,-0.53,0.70223,0.9,0.4464,-0.28,0.83016,0.56,0.69836,2.05,0.09,2.34,0.05501
1,3,-4.16,0.01486,-0.65,0.68345,-3.51,0.02672,-1.15,0.52792,1.24,0.42661,-0.83,0.64199
2,5,-2.83,0.20136,0.63,0.76302,-2.27,0.26526,-0.47,0.83439,2.87,0.15767,-2.62,0.25865
3,10,-3.06,0.23783,-1.74,0.49529,-2.78,0.24112,-1.59,0.52797,4.15,0.09381,-7.0,0.01445
4,20,5.6,0.05788,-8.84,0.00634,4.68,0.08642,-0.17,0.95434,6.77,0.02865,-6.65,0.05349


In [8]:
# import pandas as pd
# from tabulate import tabulate

# # Initialize an empty list to store results
# results = []

# indices = ['GILT2', 'FTSE100', 'MSCIUK', 'FTSEAllShare', 'FTSE250', 'FTSESmallCap','FTSEAIM']
# days_list = [1, 3, 5, 10, 20]

# for days in days_list:
#     result_row = [days]
#     for index in indices:
#         df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

#         # Ensure 'Date' column is in datetime format
#         df['Date'] = pd.to_datetime(df['Date'])

#         y = df[f'% {index} Change']
#         X = df[['Change in SWAV',
#                 'ADS_Change',
#                 'EPU_Change',
#                 f'Previous % {index} Change',
#                 '% MSCI Change',
#                 'Vix Close',
#                 'Rolling_Avg_Change_in_DCC',
#                 'Stringency_Change']]

#         X = sm.add_constant(X)
#         model = sm.OLS(y, X).fit()

#         coef = round(model.params['Change in SWAV'], 2)
#         p_value = round(model.pvalues['Change in SWAV'], 5)

#         result_row.extend([coef, p_value])  # Make sure you add both coef and p-value

#     # Append the result row for this combination of days
#     results.append(result_row)

# # Define column names dynamically
# columns = ['Days']
# for index in indices:
#     columns.extend([f'{index} Coef', f'{index} P-value'])  # Ensure you have columns for both coef and p-value

# # Convert the results list to a DataFrame
# results_df = pd.DataFrame(results, columns=columns)

# # Define a function to apply the styling
# def color_rows(row):
#     colors = []
#     for i in range(1, len(row), 2):  # Skip Days, then iterate through Coefs and P-values
#         coef = row[i]
#         p_value = row[i+1]
#         if p_value < 0.1:  # Consider p-values < 0.1 as significant
#             if coef > 0:
#                 colors.extend(['background-color: green', ''])
#             elif coef < 0:
#                 colors.extend(['background-color: red', ''])
#         else:
#             colors.extend(['background-color: #d3d3d3', ''])  # Use a lighter grey color
#     return [''] * 1 + colors  # No coloring for Days

# # Apply the function to each row of the DataFrame
# styled_df = results_df.style.apply(color_rows, axis=1)

# # Display the styled DataFrame
# styled_df

In [9]:
# import pandas as pd
# from tabulate import tabulate

# # Initialize an empty list to store results
# results = []

# indices = ['GILT2', 'FTSE100', 'MSCIUK', 'FTSEAllShare', 'FTSE250', 'FTSESmallCap','FTSEAIM']
# days_list = [1, 3, 5, 10, 20]
# years_list = range(2017, 2023)  # Years from 2017 to 2022

# for year in years_list:
#     for days in days_list:
#         result_row = [year, days]
#         for index in indices:
#             df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

#             # Ensure 'Date' column is in datetime format
#             df['Date'] = pd.to_datetime(df['Date'])
#             # Filter for the specific year
#             df_filtered = df[df['Date'].dt.year == year]

#             y = df_filtered[f'% {index} Change']
#             X = df_filtered[['Change in SWAV',
#                              'ADS_Change',
#                              'EPU_Change',
#                              f'Previous % {index} Change',
#                              '% MSCI Change',
#                              'Vix Close',
#                              'Rolling_Avg_Change_in_DCC',
#                              'Stringency_Change']]

#             X = sm.add_constant(X)
#             model = sm.OLS(y, X).fit()

#             coef = round(model.params['Change in SWAV'], 2)
#             p_value = round(model.pvalues['Change in SWAV'], 5)

#             result_row.extend([coef, p_value])

#         # Append the result row for this combination of year and days
#         results.append(result_row)

# # Define column names dynamically
# columns = ['Year', 'Days']
# for index in indices:
#     columns.extend([f'{index} Coef', f'{index} P-value'])

# # Convert the results list to a DataFrame
# results_df = pd.DataFrame(results, columns=columns)

# # Define a function to apply the styling
# def color_rows(row):
#     colors = []
#     for i in range(2, len(row), 2):  # Skip Year and Days, then iterate through Coefs and P-values
#         coef = row[i]
#         p_value = row[i+1]
#         if p_value < 0.05:  # Consider p-values < 0.05 as significant
#             if coef > 0:
#                 colors.extend(['background-color: green', ''])
#             elif coef < 0:
#                 colors.extend(['background-color: red', ''])
#         else:
#             colors.extend(['background-color: grey', ''])
#     return [''] * 2 + colors  # No coloring for Year and Days

# # Apply the function to each row of the DataFrame
# styled_df = results_df.style.apply(color_rows, axis=1)

# # Display the styled DataFrame
# styled_df