### Purpose:
The purpose of this notebook is to clean and re-integrate the reacquired content and then scan it with the readibility API

### Dependancies:

In [37]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import os
import csv

### Functions:

In [38]:
def clean_text(text: str) -> str:
    """
    Clean up text by replacing tabs and newlines with spaces, and replacing multiple spaces with single spaces.

    Parameters:
    text (str): The text to clean up.

    Returns:
    str: The cleaned up text.
    """
    # Replace all tabs with a space
    text = re.sub("\t", " ", text)

    # Replace all newlines with a space
    text = re.sub("\n", " ", text)

    # Replace all double spaces with a single space
    text = re.sub("  +", " ", text)

    return text

In [39]:
def load_csv(filepath: str) -> dict:
    """
    Load a CSV file and return a dictionary with the link as the key and the cleaned text as the value.

    Parameters:
    filepath (str): The path to the CSV file to load.

    Returns:
    dict: A dictionary with the link as the key and the cleaned text as the value.
    """
    # Increase the field size limit to the maximum possible value
    csv.field_size_limit(2**31-1)

    data = {}
    with open(filepath, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            link = row["Link"]
            text = row["Text"]
            cleaned_text = clean_text(text)
            data[link] = cleaned_text
    return data

In [40]:
def load_all_csvs(directory: str) -> dict:
    """
    Load all CSV files in a directory and return a dictionary with the link as the key and the cleaned text as the value.

    Parameters:
    directory (str): The path to the directory containing the CSV files to load.

    Returns:
    dict: A dictionary with the link as the key and the cleaned text as the value for all CSV files in the directory.
    """
    data = {}
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            csv_data = load_csv(filepath)
            data.update(csv_data)
    return data

In [41]:
x = load_all_csvs('data/responses')

In [42]:
len(x)

2508

In [43]:
filepath = "data/responses/00001.csv"
data = load_csv(filepath)
print(data)

{'https://www.familyhandyman.com/project/how-to-finish-concrete/': " How to Finish Concrete (DIY) | Family Handyman Share Save Saved Share on Facebook Save on Pinterest Tweet this Email Next Project Skip to main content Watch TV Pro New Homeowners Projects DIY University *NEW* Shopping Subscribe Outdoor Kitchen Kits Home Skills Concreting How to Finish Concrete Family HandymanUpdated: Sep. 02, 2021 Techniques for making a smooth, durable finish on a concrete surface. Family Handyman Next Project Time A full day Complexity Intermediate Cost $51–100 Introduction In this article, we'll show you the tools and techniques you need to get a smooth, durable finish on concrete. We'll also tell you how to know when the concrete is ready for each phase of the finishing process. Whether you're pouring a small slap of concrete, like a pad for your garbage can, or a large slab for a patio, these DIY steps will guide you through the process. Tools Required broomDarbyEdgerGrooverMagnesium floatRubber 