# Quotes
Here, I will get all Trump Quotes and see if I can find anything related to campaing promises. Maybe embedd them and test them against the embedded narratives I have

In [5]:
import os

In [None]:
pd.read_csv()

In [6]:
import pandas as pd
from pathlib import Path

def load_and_filter_quotes_minimal(base_dir: str) -> pd.DataFrame:
    """
    Reads all '*_quotes.csv' files recursively from subdirectories in base_dir,
    filters for rows where 'person_name' contains 'Trump',
    and concatenates them into a single pandas DataFrame.
    Assumes correct path, file format, and 'person_name' column existence.

    Args:
        base_dir: The path to the base directory containing year_month folders
                  (e.g., 'Data/Bias_Detector_Data/quotes/').

    Returns:
        A pandas DataFrame containing all filtered quote data, or an empty
        DataFrame if no matching data is found.
    """
    base_path = Path(base_dir)
    all_filtered_dfs = [] # List to hold filtered DataFrames

    # Define the file pattern and find all matching CSV files recursively
    file_pattern = '*/*_quote.csv'
    csv_files = list(base_path.rglob(file_pattern))

    # Print the number of files found
    print(f"Found {len(csv_files)} files matching the pattern '{file_pattern}'.")

    # Iterate through all found CSV files
    for csv_file in csv_files:
        # Read the CSV file
        # Using low_memory=False can help with mixed type warnings if files are large/complex
        df_temp = pd.read_csv(csv_file, low_memory=False)

        # Check if 'person_name' column exists before filtering
        if 'person_name' in df_temp.columns:
            # Filter rows where 'person_name' contains 'Trump' (case-sensitive)
            # na=False ensures rows with NaN in 'person_name' are excluded
            df_filtered = df_temp[df_temp['person_name'].str.contains('Trump', na=False)]

            # Append the filtered DataFrame to the list if it's not empty
            if not df_filtered.empty:
                all_filtered_dfs.append(df_filtered)

    # Check if the list of filtered DataFrames is empty before concatenating
    if not all_filtered_dfs:
        print("Warning: No data containing 'Trump' found in any files.")
        return pd.DataFrame() # Return an empty DataFrame

    # Concatenate all filtered DataFrames into a single one
    final_df = pd.concat(all_filtered_dfs, ignore_index=True)

    return final_df

# --- Example Usage ---
# 1. Define the path to your data directory
data_directory = 'Data/Bias_Detector_Data/quote/' # Make sure this path is correct

# 2. Call the function to get the filtered DataFrame
trump_quotes_df = load_and_filter_quotes_minimal(data_directory)

# 3. Print the result
if not trump_quotes_df.empty:
    print(f"\nLoaded and filtered data.")
    print(f"Total quotes containing 'Trump': {len(trump_quotes_df)}")
    print("\nFirst 5 rows of the combined DataFrame:")
    print(trump_quotes_df.head())
else:
    # This message will now be printed if the function returned an empty DataFrame
    print("\nNo data containing 'Trump' was found, or an issue occurred (e.g., directory not found).")

# print("\nDataFrame Info:")
# if not trump_quotes_df.empty: # Only show info if DataFrame is not empty
#     trump_quotes_df.info()


Found 3660 files matching the pattern '*/*_quote.csv'.

Loaded and filtered data.
Total quotes containing 'Trump': 55221

First 5 rows of the combined DataFrame:
                           quote_id  \
0  cbca6782a7c7368b19630f916f1afd28   
1  ca91e27267906029d274d66e664b78d2   
2  16bcd7b9912da61ca4ed63def9fd9473   
3  5f6fdb0f2b9d3c56edd7e55259ad5958   
4  faef33114b15e8b7d234e094b6b055f9   

                                               quote   person_name  \
0  We're going to be working on that the first da...  Donald Trump   
1  I stand before you today not only as your past...  Donald Trump   
2  A little something I had fun with over the win...    Lara Trump   
3            illegal, un-American, unConstitutional.  Donald Trump   
4  wrongfully attempting to deprive me of my Firs...  Donald Trump   

                        person_occupation             person_affiliation  \
0   Former President of the United States                            NaN   
1   Former President of the Un

In [8]:
print(trump_quotes_df.shape)
trump_quotes_df.head()

(55221, 12)


Unnamed: 0,quote_id,quote,person_name,person_occupation,person_affiliation,person_domain,person_capacity,finish_reason,article_id,publisher,publisher_full,yearmo
0,cbca6782a7c7368b19630f916f1afd28,We're going to be working on that the first da...,Donald Trump,Former President of the United States,,Politics,subject,stop,846831af311a03a41448c12f7d09fbf2,washingtonpost,Washington Post,2024-03
1,ca91e27267906029d274d66e664b78d2,I stand before you today not only as your past...,Donald Trump,Former President of the United States,,Politics,subject,stop,846831af311a03a41448c12f7d09fbf2,washingtonpost,Washington Post,2024-03
2,16bcd7b9912da61ca4ed63def9fd9473,A little something I had fun with over the win...,Lara Trump,Republican National Committee co-chair,Republican National Committee,Politics,subject,stop,977b6352ab52d12311e8e4630913b899,huffpost,Huffington Post,2024-03
3,5f6fdb0f2b9d3c56edd7e55259ad5958,"illegal, un-American, unConstitutional.",Donald Trump,Former President,Republican Party,Politics,subject,stop,4311e770173b22efde49073a7e51f720,huffpost,Huffington Post,2024-03
4,faef33114b15e8b7d234e094b6b055f9,wrongfully attempting to deprive me of my Firs...,Donald Trump,Former President,Republican Party,Politics,subject,stop,4311e770173b22efde49073a7e51f720,huffpost,Huffington Post,2024-03


In [12]:
trump_quotes_df['person_name'].unique()

array(['Donald Trump', 'Lara Trump', 'Donald J. Trump', 'Melania Trump',
       'Donald John Trump', 'Donald Trump Jr', 'Donald Trump Jr.',
       'Eric Trump', "Trump's lawyers", 'Mary Trump', 'Fred Trump Sr.',
       'Maryanne Trump Barry', 'Mary L. Trump', 'Ivanka Trump',
       'Trump legal team', 'Cherise Trump', 'Trump lawyers',
       'Don Trump, Jr.', 'Donald J Trump', 'Trump',
       "Donald Trump's lawyers", "Melania Trump's office",
       'Trump War Room', 'Don Trump Jr.', "Trump L's", 'Mr. Trump',
       'Barron Trump', 'Ivana Trump', "Trump's lawyer",
       'Real Americans despise Trump', 'Fred Trump III',
       'Trump campaign aide', 'Donald Trump 🇺🇸 News', 'AntiTrumpTexan',
       'Former President Donald J. Trump', 'Don Trump Jr',
       'BadTrumpQuips', "Former President Trump's campaign",
       "Donald Trump's campaign", 'Trump campaign', 'Carolina Trump',
       'Luke Trump', 'Maryanne Trump', 'Kai Madison Trump', 'Kai Trump',
       'former Trump Justice Departm

In [14]:
donald_trump_references = [
    'Donald Trump', 
    'Donald J. Trump', 
    'Donald John Trump', 
    'Donald J Trump', 
    'Trump', 
    'Mr. Trump', 
    'Former President Donald J. Trump', 
    'President-elect Trump', 
    'President-elect Donald J. Trump', 
    'Donald Trump Sr.',
    'Felonius J. Trump',
    'AI Trump'
]

In [15]:
#only keep the rows that contain the references
trump_quotes_df = trump_quotes_df[trump_quotes_df['person_name'].isin(donald_trump_references)]
print(trump_quotes_df.shape)
trump_quotes_df.head()

(51510, 12)


Unnamed: 0,quote_id,quote,person_name,person_occupation,person_affiliation,person_domain,person_capacity,finish_reason,article_id,publisher,publisher_full,yearmo
0,cbca6782a7c7368b19630f916f1afd28,We're going to be working on that the first da...,Donald Trump,Former President of the United States,,Politics,subject,stop,846831af311a03a41448c12f7d09fbf2,washingtonpost,Washington Post,2024-03
1,ca91e27267906029d274d66e664b78d2,I stand before you today not only as your past...,Donald Trump,Former President of the United States,,Politics,subject,stop,846831af311a03a41448c12f7d09fbf2,washingtonpost,Washington Post,2024-03
3,5f6fdb0f2b9d3c56edd7e55259ad5958,"illegal, un-American, unConstitutional.",Donald Trump,Former President,Republican Party,Politics,subject,stop,4311e770173b22efde49073a7e51f720,huffpost,Huffington Post,2024-03
4,faef33114b15e8b7d234e094b6b055f9,wrongfully attempting to deprive me of my Firs...,Donald Trump,Former President,Republican Party,Politics,subject,stop,4311e770173b22efde49073a7e51f720,huffpost,Huffington Post,2024-03
5,0f41e3f36520b52bfe9bba236d6323e6,"If I don't get elected, it's going to be a blo...",Donald Trump,Former President,,Politics,commentary,stop,420a5cc73281ca39c539fa9a6a01615f,huffpost,Huffington Post,2024-03


In [18]:
#save to parquet
trump_quotes_df.to_parquet('Data/Quotes/trump_quotes.parquet', index=False)