### Basic import & Helper function

In [56]:
import pickle
import re
import pandas as pd
import numpy as np
import random


In [6]:
file_path = 'data/ratings_retail_indus.pkl'

In [8]:
with open(file_path, 'rb') as f:
    data = pickle.load(f)

In [11]:
df = pd.DataFrame(data)
df = df.T
df

Unnamed: 0,2010Q1,2010Q2,2010Q3,2010Q4,2011Q1,2011Q2,2011Q3,2011Q4,2012Q1,2012Q2,...,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4,2023Q1,2023Q2,2023Q3,2023Q4,2024Q1
AAL,B-,B-,B-,B-,B-,B-,B-,B-,D,D,...,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-
AAP,BB+,BB+,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,...,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-,BBB-
ABG,B+,B+,B+,B+,B+,B+,B+,B+,B+,B+,...,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+
ACCO,B+,B+,B+,B+,B+,B+,B+,B+,B+,B+,...,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-
ACM,,,,,,,,,,,...,BB,BB,BB,BB,BB,BB,BB,BB,BB,BB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WWW,,,,,,,,,,,...,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+,BB+
WYNN,BB,BB,BB,BB,BB,BB,BB+,BB+,BB+,BB+,...,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-,BB-
XPO,,,,,,,,,,,...,B+,B+,B+,B+,B+,B+,B+,B+,B+,B+
XYL,,,,,,,,BBB,BBB,BBB,...,BBB,BBB,BBB,BBB,BBB,BBB,BBB,BBB,BBB,BBB


In [14]:
rating_order = [
    'D', 'CCC-', 'CCC', 'CCC+', 'CC', 
    'B-', 'B', 'B+', 'BB-', 'BB', 'BB+', 
    'BBB-', 'BBB', 'BBB+', 'A-', 'A', 'A+', 
    'AA-', 'AA', 'AA+', 'AAA'
]

In [16]:
rating_to_num = {rating: i for i, rating in enumerate(rating_order)}

In [18]:
def rating_to_numeric(rating):
    if pd.isna(rating):
        return None
    return rating_to_num.get(rating, None)

In [19]:
df_num = df.applymap(rating_to_numeric)

In [None]:
df_diff = df_num.diff(axis=1)

In [22]:
first_quarter = df.iloc[:, 0]
df_diff.iloc[:, 0] = first_quarter.apply(lambda x: 0 if pd.notna(x) else None)

AAL       B-
AAP      BB+
ABG       B+
ACCO      B+
ACM      NaN
        ... 
WWW      NaN
WYNN      BB
XPO      NaN
XYL      NaN
YUM     BBB-
Name: 2010Q1, Length: 392, dtype: object

In [25]:
def count_changes(diff_df):
    no_change = (diff_df == 0).sum().sum()
    increase = (diff_df > 0).sum().sum()
    decrease = (diff_df < 0).sum().sum()
    return no_change, increase, decrease

In [26]:
no_change, increase, decrease = count_changes(df_diff)
print(f"No Change: {no_change}")
print(f"Increase: {increase}")
print(f"Decrease: {decrease}")

No Change: 19962
Increase: 357
Decrease: 168


In [52]:
def extract_ratings_by_change_type(df, df_diff):
    increase = []
    decrease = []
    no_change = []
    for company in df.index:
        for quarter in df.columns:
            if df_diff.loc[company, quarter] > 0:
                increase.append((company, quarter))
            elif df_diff.loc[company, quarter] < 0:
                decrease.append((company, quarter))
            elif df_diff.loc[company, quarter] == 0:
                no_change.append((company, quarter))
    
    return increase, decrease, no_change

In [53]:
increase, decrease, no_change = extract_ratings_by_change_type(df, df_diff)


In [54]:
num_samples = min(len(increase), len(decrease), len(no_change))


In [57]:
selected_increase = random.sample(increase, num_samples)
selected_decrease = random.sample(decrease, num_samples)
selected_no_change = random.sample(no_change, num_samples)

In [59]:
selected_data = selected_increase + selected_decrease + selected_no_change


In [62]:
final_ratings = {}
for company, quarter in selected_data:
    if company not in final_ratings:
        final_ratings[company] = {}
    final_ratings[company][quarter] = df.loc[company, quarter]

In [64]:
print("Final Ratings:", final_ratings)

Final Ratings: {'AL': {'2017Q1': 'BBB', '2020Q3': 'BBB'}, 'CAL': {'2010Q2': 'B', '2015Q1': 'BB-', '2014Q1': 'B+', '2010Q4': 'B+', '2012Q1': 'B'}, 'GT': {'2015Q3': 'BB'}, 'DAN': {'2014Q2': 'BB+', '2012Q2': 'BB', '2010Q3': 'B+'}, 'GBX': {'2012Q2': 'B', '2013Q1': 'B+', '2020Q3': 'BB', '2023Q4': 'BB'}, 'SRI': {'2011Q3': 'BB-', '2014Q4': 'BB', '2019Q2': 'BB'}, 'UNP': {'2012Q3': 'A-', '2011Q1': 'BBB+'}, 'HA': {'2015Q3': 'B+', '2016Q3': 'BB-'}, 'LEVI': {'2014Q1': 'BB-', '2017Q1': 'BB+'}, 'RCL': {'2015Q4': 'BB+', '2011Q1': 'BB'}, 'LII': {'2014Q2': 'BBB', '2014Q4': 'BBB', '2014Q3': 'BBB'}, 'ENOV': {'2015Q2': 'BB+'}, 'CAR': {'2010Q2': 'B+', '2014Q3': 'BB-', '2017Q1': 'BB'}, 'LKQ': {'2010Q3': 'BB', '2011Q3': 'BB+', '2016Q2': 'BB'}, 'ITT': {'2016Q3': 'BBB', '2012Q1': 'BBB-'}, 'WAB': {'2015Q3': 'BBB', '2011Q3': 'BB+'}, 'OSK': {'2010Q4': 'BB-', '2014Q1': 'BB+'}, 'IHG': {'2011Q3': 'BBB'}, 'GLDD': {'2015Q1': 'B', '2013Q4': 'B-', '2016Q1': 'B-', '2015Q3': 'B'}, 'CPS': {'2013Q2': 'BB-'}, 'M': {'2011Q3':

In [65]:
final_ratings

{'AL': {'2017Q1': 'BBB', '2020Q3': 'BBB'},
 'CAL': {'2010Q2': 'B',
  '2015Q1': 'BB-',
  '2014Q1': 'B+',
  '2010Q4': 'B+',
  '2012Q1': 'B'},
 'GT': {'2015Q3': 'BB'},
 'DAN': {'2014Q2': 'BB+', '2012Q2': 'BB', '2010Q3': 'B+'},
 'GBX': {'2012Q2': 'B', '2013Q1': 'B+', '2020Q3': 'BB', '2023Q4': 'BB'},
 'SRI': {'2011Q3': 'BB-', '2014Q4': 'BB', '2019Q2': 'BB'},
 'UNP': {'2012Q3': 'A-', '2011Q1': 'BBB+'},
 'HA': {'2015Q3': 'B+', '2016Q3': 'BB-'},
 'LEVI': {'2014Q1': 'BB-', '2017Q1': 'BB+'},
 'RCL': {'2015Q4': 'BB+', '2011Q1': 'BB'},
 'LII': {'2014Q2': 'BBB', '2014Q4': 'BBB', '2014Q3': 'BBB'},
 'ENOV': {'2015Q2': 'BB+'},
 'CAR': {'2010Q2': 'B+', '2014Q3': 'BB-', '2017Q1': 'BB'},
 'LKQ': {'2010Q3': 'BB', '2011Q3': 'BB+', '2016Q2': 'BB'},
 'ITT': {'2016Q3': 'BBB', '2012Q1': 'BBB-'},
 'WAB': {'2015Q3': 'BBB', '2011Q3': 'BB+'},
 'OSK': {'2010Q4': 'BB-', '2014Q1': 'BB+'},
 'IHG': {'2011Q3': 'BBB'},
 'GLDD': {'2015Q1': 'B', '2013Q4': 'B-', '2016Q1': 'B-', '2015Q3': 'B'},
 'CPS': {'2013Q2': 'BB-'},
 'M