In [1]:
import pandas as pd
from pathlib import Path
import numpy as np
import glob, os

folder_path = 'D:\VSCode\Lead Analysis'
csv_files = glob.glob(os.path.join(folder_path, '**', '*.csv'), recursive=True)


In [None]:
# Initialize the DataFrame
leads_breakdown = pd.DataFrame(columns=['Filename', '# of rows', 'Column List'])

# List of possible encodings to try
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']

print(len(csv_files))
for file in csv_files:
    # Get the filename
    name = Path(file).name
    
    print(name)
    row_count = 0
    columns = None
    
    # Try different encodings
    for encoding in encodings:
        try:
            columns_df = pd.read_csv(file, nrows=0, encoding=encoding)
            columns = columns_df.columns
            break  # If it succeeds, no need to try other encodings
        except UnicodeDecodeError:
            continue  # Try the next encoding

    if columns is None:
        print(f"Failed to decode {name} with available encodings.")
        continue
    
    # Count rows in chunks
    for chunk in pd.read_csv(file, chunksize=1000000, usecols=[0], on_bad_lines='skip', encoding=encoding):
        row_count += len(chunk)
    
    # Create new row with information
    column_list = ', '.join(columns)
    new_row = pd.DataFrame([{'Filename': name, '# of rows': row_count, 'Column List': column_list}])
    leads_breakdown = pd.concat([leads_breakdown, new_row], ignore_index=True)
    print(f'\nDone: {row_count}\n')


In [29]:
leads_breakdown.to_csv('Output\lead_analysis.csv', index=False)
leads_breakdown.to_csv('Output\lead_plumb.csv', index=False)

In [None]:
# 2063 prev set                                                                                                                            Click execute above cells ^