In [None]:
import pandas as pd
import logging
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load final dataset with features in chunks
def load_data_in_chunks(file_path, chunksize=10000):
    logging.info(f"Loading data in chunks from {file_path}")
    for chunk in pd.read_csv(file_path, chunksize=chunksize):
        yield chunk

# Analyze data chunk
def analyze_data(chunk):
    logging.info("Analyzing data chunk")
    summary_stats = chunk.describe()
    logging.info(f"Summary statistics: \n{summary_stats}")
    return summary_stats

# Visualize data chunk
def visualize_data(chunk):
    logging.info("Visualizing data chunk")
    chunk.hist(bins=50, figsize=(20, 15))
    plt.show()

# Analysis and visualization in batches
def analyze_and_visualize_in_batches(input_file, chunksize=10000):
    chunk_iter = load_data_in_chunks(input_file, chunksize)
    for chunk in tqdm(chunk_iter, desc="Analyzing and visualizing data"):
        analyzed_chunk = analyze_data(chunk)
        visualize_data(analyzed_chunk)

# Analysis and visualization on final dataset with features
logging.info("Starting analysis and visualization on final dataset with features.")
input_file = "datasets/final_dataset_with_features.csv"
analyze_and_visualize_in_batches(input_file)
logging.info("Finished analysis and visualization on final dataset with features.")
