In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import os

# Define a function to read and preprocess the text files, returning a string
def preprocess_text(file_path):
    with open(file_path, 'r') as file:
        text = file.read().lower()
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    tokens = [token for token in tokens if token not in stop_words and token.isalpha()]
    return ' '.join(tokens)

# Specify the folder name
folder = 'FED'

# For each file in the folder
def process_file(file_name):
    # Preprocess the text
    file_path = os.path.join(folder, file_name)
    document = preprocess_text(file_path)

    # Create a TF-IDF Vectorizer instance
    vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english')

    # Apply the vectorizer to the document
    tfidf = vectorizer.fit_transform([document])

    # Create an NMF instance
    nmf = NMF(n_components=5, random_state=1)

    # Apply the NMF model to your TF-IDF matrix
    W = nmf.fit_transform(tfidf)
    H = nmf.components_

    # Print the topics
    print(f"File: {file_name}")
    for i, topic in enumerate(H):
        print(f"Topic {i}:")
        print(" ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]]))
    print("\n")

# Specify the file name
file_name = '2020F.txt'

process_file(file_name)

In [31]:
FED2020F = process_file('2020F.txt')
FED2020S = process_file('2020S.txt')

  return np.sqrt(res * 2)


File: 2020F.txt
Topic 0:
market economic funds year growth inflation policy percent federal rate
Topic 1:
influences significant middle events ip output unchanged governments robust monetary
Topic 2:
principles continued historically disruptions coronavirus half confidence indicate faced lfprs
Topic 3:
intervals past changes elevated objective result declined groups banks changed
Topic 4:
tax house surrounding directly payments considerable periods broad measure gross


File: 2020S.txt
Topic 0:
financial securities economic credit businesses treasury percent market reserve federal
Topic 1:
uncertainty additional years reducing actions conditions substantial legislation insurance associated
Topic 2:
targeted annual late firms borrowers smooth enacted fed percentage sectors
Topic 3:
quickly congress goods falling level available compensation payroll subsequent especially
Topic 4:
cost markets unemployed notable implemented temporarily dealers volatility commercial mbs




In [32]:
FED2021F = process_file('2021F.txt')
FED2021S = process_file('2021S.txt')

  return np.sqrt(res * 2)


File: 2021F.txt
Topic 0:
pandemic monetary employment federal market economic rate percent inflation policy
Topic 1:
achieve estimated rules recently volatility monitoring work provided nominal increase
Topic 2:
told fall primarily policymakers error afe nontraditional main result discussion
Topic 3:
earlier residential shows consistent rate measure patterns million actions rise
Topic 4:
concluded longerterm burdens descriptions considerably backed mutual ramped indicatorsthe stay


File: 2021S.txt
Topic 0:
labor levels prices percent year market policy rate federal inflation
Topic 1:
led figure line broad appropriate data temporarily reflects trade contrast
Topic 2:
moved wage trillion upward lower vaccinations liabilities tips product continuing
Topic 3:
expansion population older important countries elb return including prepandemic situation
Topic 4:
rising result effr spike outcome ahead funding american excludes months




In [33]:
FED2022F = process_file('2022F.txt')
FED2022S = process_file('2022S.txt')

File: 2022F.txt
Topic 0:
rate prices market half supply labor federal percent year inflation
Topic 1:
ongoing amid gains led unemployment shift rrp observed share shows
Topic 2:
business listed years partially united declines growth earnings start motor
Topic 3:
infrastructure mmfs aid leisure accelerated rebounded leverage estimates january remain
Topic 4:
reference set activity effective canada equipment related advanced personal leverage


File: 2022S.txt
Topic 0:
continued levels market prices federal year percent inflation rate policy
Topic 1:
associated deficit sector uncertainty markets leverage recently reduce months provided
Topic 2:
emerging forward level number concentrated board chinese holdings china rapidly
Topic 3:
weighing effects hiring principles eme meeting agency wealth developments support
Topic 4:
delivery issuance illustrate modestly contacts labor governments approach riksbank different




  return np.sqrt(res * 2)


In [34]:
# Specify the folder name
folder = 'PBOC'

# For each file in the folder
def process_file(file_name):
    # Preprocess the text
    file_path = os.path.join(folder, file_name)
    document = preprocess_text(file_path)

    # Create a TF-IDF Vectorizer instance
    vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english')

    # Apply the vectorizer to the document
    tfidf = vectorizer.fit_transform([document])

    # Create an NMF instance
    nmf = NMF(n_components=5, random_state=1)

    # Apply the NMF model to your TF-IDF matrix
    W = nmf.fit_transform(tfidf)
    H = nmf.components_

    # Print the topics
    print(f"File: {file_name}")
    for i, topic in enumerate(H):
        print(f"Topic {i}:")
        print(" ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]]))
    print("\n")

In [35]:
PBOC2020F = process_file('2020F.txt')
PBOC2020S = process_file('2020S.txt')

  return np.sqrt(res * 2)


File: 2020F.txt
Topic 0:
rates pbc central points bank rate market financial year percent
Topic 1:
maintaining outstanding requirements industrial remaining total catering direct taken efficiency
Topic 2:
conducted featuring fully achievements expectations china鈥檚 marginal onyear progress risk
Topic 3:
steps repayments supplies significant guiding repo good micro remain august
Topic 4:
chinadevelopment yielding trendingdownward buy era fines interestrates carried increased export


File: 2020S.txt
Topic 0:
central policy exchange trillion pbc rate market financial year percent
Topic 1:
arrangements governance equipment high brought range giving managed chains achieved
Topic 2:
expanded allowed line closed declined tools stabilizer payments receipts march
Topic 3:
registering boost showed rebound initial fairly beginning protectionism remained reform
Topic 4:
rebounding improving purchases created parities going properly building outstanding coordinated




In [36]:
PBOC2021F = process_file('2021F.txt')
PBOC2021S = process_file('2021S.txt')

  return np.sqrt(res * 2)


File: 2021F.txt
Topic 0:
monetary growth pbc rates policy financial market rate year percent
Topic 1:
certain institutions inflation total needs equilibrium adaptive operations performance lpr
Topic 2:
internal integrated state amounted needs assets significantly normal relevant strengthening
Topic 3:
achieve jinping crude omos decreasing enhancing accelerating bonds improve different
Topic 4:
safe russia communications alleviation goals likely financial usd boost floating


File: 2021S.txt
Topic 0:
rates institutions growth policy rate pbc market financial year percent
Topic 1:
month rapid period trading rapidly fifth framework price kept maintain
Topic 2:
reducing year began application important averaged yields significant play guided
Topic 3:
output federal stabilizer deepened seven normal reduction phased way core
Topic 4:
transparent removed appreciate paper vitality demanding upwards strengthens interconnected april




In [37]:
PBOC2022F = process_file('2022F.txt')
PBOC2022S = process_file('2022S.txt')

  return np.sqrt(res * 2)


File: 2022F.txt
Topic 0:
rates trillion growth institutions market rate pbc financial year percent
Topic 1:
msmes area kept inflation insurance imports appreciated overseas point structure
Topic 2:
preparations regulation relief practice expected rapid affiliated help actually posted
Topic 3:
allocations changes supports mix weakened orderly cpi favorable increment directions
Topic 4:
aggregates access featuring upgrading msmes mlf care bilateral second focusing


File: 2022S.txt
Topic 0:
loans policy institutions exchange trillion market rate financial year percent
Topic 1:
representing revitalization role investments savings funds conducting shocks locally fully
Topic 2:
elderly million food operating real businesses early factors structural shanghai
Topic 3:
qualified enterprises focused room areas amid agriculture efficiency progress stimulus
Topic 4:
price advancing committee previous point payments shenzhen aggregates maturity capital


