# Import Libraries

In [None]:
import sweetviz as sv
import pandas as pd
import numpy as np
from sweetviz.feature_config import FeatureConfig


# Load your primary dataset

In [None]:
data = pd.read_excel('D:/Git-Hub Projects/DATA-ANALYSIS-PROJECTS/Automated-EDA/data/CovidDeaths.xlsx')

# Fix Numpy Issue

In [None]:
if not hasattr(np, 'VisibleDeprecationWarning'):
    np.VisibleDeprecationWarning = DeprecationWarning

# Customizations in Sweetviz:
 - source : Source data frame
 - compare_data : Comparison data frame
 - target_feat : Specify the target variable.
 - pairwise_analysis : Enable full pairwise analysis (set to str("Auto")).
 - skip_after_comparison : Whether to skip further analysis after comparing target vs remaining features.
 - feat_cfg : Supply custom configurations (for example, to tweak categorical detectors). 

# Example 1: Analyze a Single Dataset

In [None]:
# Generate an EDA report (if you have a target, specify it; else leave target_feat out)
report = sv.analyze(data)

# Save and open the report in your browser
report.show_html("single_dataset_report.html")

# Example 2: Compare Two Datasets

#### Split the data into two equal parts (You can use any 2 different datasets)

In [None]:
full_data = data.copy()  # Keep a copy of the original data
split_index = len(full_data) // 2

data = full_data.iloc[:split_index].copy()          # First half as the training set
compare_data = full_data.iloc[split_index:].copy()    # Second half as the testing set

print("Training set shape:", data.shape)
print("Testing set shape:", compare_data.shape)

#### Generate The Comparison Report

In [None]:
# Create a FeatureConfig object
custom_feature_cfg = FeatureConfig(
    skip=[],  # List of features to skip
    force_cat=[]  # Force these features to be treated as categorical
)

# Now use the FeatConfig object in the compare function
report_compare = sv.compare(
    source=data,
    compare=compare_data,
    # target_feat="target_column",
    pairwise_analysis="auto",
    feat_cfg=custom_feature_cfg
)

#### Save and Show the Report in HTML

In [None]:
report_compare.show_html("Comparison_dataset_report.html")

#### Show Report within Notebook

In [None]:
report_compare.show_notebook()