# DataBro Library Demo
This notebook demonstrates the usage of DataBro library with the Iris dataset.

In [None]:
# First, let's install our required dependencies
!pip install pandas numpy matplotlib seaborn plotly scikit-learn tabulate

In [None]:
# Import required libraries
from sklearn.datasets import load_iris
import pandas as pd

# Load the iris dataset
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# Save it as CSV for our loader
iris_df.to_csv('iris_data.csv', index=False)

In [None]:
# Import DataBro components
from databro import DataLoader, DataPreprocessor, DataVisualizer, DataSummarizer

# Initialize components
loader = DataLoader()
preprocessor = DataPreprocessor()
visualizer = DataVisualizer()
summarizer = DataSummarizer()

## 1. Loading and Inspecting Data

In [None]:
# Load the data
df = loader.load_csv('iris_data.csv')

# Get information about the dataset
loader.get_info()

# Show sample of the data
loader.show_sample(n=3)

## 2. Data Preprocessing

In [None]:
# Scale the numerical features
scaled_df = preprocessor.scale_data(df, method='standard', 
                                   columns=[col for col in df.columns if col != 'target'])

print("\nFirst few rows of scaled data:")
print(scaled_df.head())

## 3. Data Visualization

In [None]:
# Create various plots
# 1. Distribution of sepal length
visualizer.plot_histogram(df, 'sepal length (cm)', interactive=True)

# 2. Scatter plot of sepal length vs sepal width
visualizer.plot_scatter(df, 
                       x='sepal length (cm)', 
                       y='sepal width (cm)', 
                       hue='target',
                       interactive=True)

# 3. Correlation heatmap
visualizer.plot_heatmap(df)

## 4. Data Summarization

In [None]:
# Get basic statistics
stats = summarizer.get_basic_stats(df)
print("Basic Statistics:")
print(stats)

# Get correlation summary
corr_summary = summarizer.get_correlation_summary(df, threshold=0.5)
print("\nStrong Correlations:")
print(corr_summary)

# Generate a comprehensive report
summarizer.generate_report(df, 'iris_report.txt')
print("\nReport generated and saved to 'iris_report.txt'")

## 5. Advanced Visualizations

In [None]:
# Box plot for each feature by target class
for feature in iris.feature_names:
    visualizer.plot_box(df, column=feature, by='target', interactive=True)