<a href="https://colab.research.google.com/github/ThalyaGIT/UK-Music-Index-Returns/blob/main/3_data-analysis_notebooks/UK_Music_Happiness_and_Index_Returns_By_Year.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# Import packages
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate


In [36]:
# Open CSV into dataframe
url_1_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_1_days.csv'
url_3_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_3_days.csv'
url_5_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_5_days.csv'
url_10_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_10_days.csv'
url_20_day = 'https://raw.githubusercontent.com/ThalyaGIT/UK-Music-Index-Returns/main/0-data-gold/data_20_days.csv'

df_1_day = pd.read_csv(url_1_day)
df_3_day = pd.read_csv(url_3_day)
df_5_day = pd.read_csv(url_5_day)
df_10_day = pd.read_csv(url_10_day)
df_20_day = pd.read_csv(url_20_day)

# # Attempt to convert the 'Date' column to datetime, coercing errors to NaT (Not a Time)
df_1_day['Date'] = pd.to_datetime(df_1_day['Date'], errors='coerce')



# **MSCI UK with No Controlling Variables**

In [37]:
days_list = [1, 3, 5, 10, 20]

for days in days_list:
    # Assuming df_1_day, df_3_day, etc. are the corresponding DataFrames for each day
    df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

    y = df['% MSCIUK Change']
    X = df[['Change in SWAV']]

    X = sm.add_constant(X)
    model = sm.OLS(y, X).fit()

    coef = round(model.params['Change in SWAV'], 2)
    p_value = round(model.pvalues['Change in SWAV'], 5)

    print(f"{days} Days")
    print(f"Change in SWAV: {coef}")
    print(f"P-value: {p_value}\n")

1 Days
Change in SWAV: -3.07
P-value: 0.30591

3 Days
Change in SWAV: -8.0
P-value: 0.04961

5 Days
Change in SWAV: -13.66
P-value: 0.00249

10 Days
Change in SWAV: -24.2
P-value: 0.0

20 Days
Change in SWAV: -32.13
P-value: 0.0



# **MSCIUK**

In [61]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['% MSCIUK Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         'Previous % MSCIUK Change',
                         '% MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change'
                         ,'TED'
                         ]]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,1.44,0.61022
1,2017,3,-4.63,0.1704
2,2017,5,-9.71,0.02436
3,2017,10,-0.2,0.96532
4,2017,20,20.96,0.00037
5,2018,1,3.2,0.26551
6,2018,3,6.48,0.10983
7,2018,5,-7.29,0.08733
8,2018,10,-9.44,0.06823
9,2018,20,-19.07,0.00387


## **Next Period's MSCI Change**

In [62]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['Next % MSCIUK Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         '% MSCIUK Change',
                         'Next % MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change'
                         ]]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,-1.85,0.5065
1,2017,3,-1.74,0.60849
2,2017,5,7.98,0.072
3,2017,10,23.33,0.0
4,2017,20,12.85,0.01013
5,2018,1,-0.12,0.9678
6,2018,3,-4.41,0.26927
7,2018,5,1.19,0.79116
8,2018,10,8.21,0.12931
9,2018,20,37.33,0.0


# **FTSE 100**

In [63]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['% FTSE100 Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         'Previous % FTSE100 Change',
                         '% MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change',
                         'TED'
                         ]]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,0.99,0.73824
1,2017,3,-0.67,0.85451
2,2017,5,0.53,0.90624
3,2017,10,6.65,0.16263
4,2017,20,-1.71,0.75831
5,2018,1,5.89,0.08913
6,2018,3,4.57,0.27901
7,2018,5,-3.27,0.4713
8,2018,10,-9.6,0.07674
9,2018,20,-5.59,0.3911


# **FTSE100 Next**




In [64]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['Next % FTSE100 Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         '% FTSE100 Change',
                         'Next % MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change']]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,-2.99,0.30182
1,2017,3,7.18,0.04286
2,2017,5,18.86,1e-05
3,2017,10,15.84,0.00074
4,2017,20,36.33,0.0
5,2018,1,-3.13,0.3505
6,2018,3,-2.82,0.51287
7,2018,5,0.93,0.85171
8,2018,10,13.11,0.04365
9,2018,20,32.95,2e-05


# **FTSE 250**

In [65]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['% FTSE250 Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         'Previous % FTSE250 Change',
                         '% MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change']]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,-1.62,0.54632
1,2017,3,-1.35,0.67646
2,2017,5,1.49,0.72909
3,2017,10,4.2,0.3741
4,2017,20,16.15,0.00149
5,2018,1,6.27,0.05154
6,2018,3,7.41,0.06462
7,2018,5,0.37,0.93214
8,2018,10,-8.4,0.12148
9,2018,20,11.27,0.09088


# **Next FTSE 250**

In [66]:
import pandas as pd
from tabulate import tabulate

# Initialize an empty list to store results
results = []

days_list = [1, 3, 5, 10, 20]
years_list = range(2017, 2023)  # Years from 2017 to 2022

for year in years_list:
    for days in days_list:
        df = globals()[f'df_{days}_day']  # Dynamically access each DataFrame

        # Ensure 'Date' column is in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Filter for the specific year
        df_filtered = df[df['Date'].dt.year == year]

        y = df_filtered['Next % FTSE250 Change']
        X = df_filtered[['Change in SWAV',
                         'ADS_Change',
                         'EPU_Change',
                         '% FTSE250 Change',
                         'Next % MSCI Change',
                         'Vix Close',
                         'Rolling_Avg_Change_in_DCC',
                         'Stringency_Change']]

        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()

        coef = round(model.params['Change in SWAV'], 2)
        p_value = round(model.pvalues['Change in SWAV'], 5)

        # Append the results to the list with year included
        results.append([year, days, coef, p_value])

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results, columns=['Year', 'Days', 'Change in SWAV', 'P-value'])

# Define a function to apply the styling
def color_rows(row):
    if row['P-value'] < 0.1:
        if row['Change in SWAV'] > 0:
            return ['background-color: green']*len(row)
        elif row['Change in SWAV'] < 0:
            return ['background-color: red']*len(row)
    return ['background-color: grey']*len(row)

# Apply the function to each row of the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Year,Days,Change in SWAV,P-value
0,2017,1,-0.69,0.7962
1,2017,3,6.76,0.03288
2,2017,5,10.72,0.01151
3,2017,10,9.88,0.02756
4,2017,20,21.54,1e-05
5,2018,1,-2.43,0.42437
6,2018,3,-4.93,0.21121
7,2018,5,0.2,0.96579
8,2018,10,17.57,0.00136
9,2018,20,36.41,0.0
