In [1]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import StandardScaler

In [2]:
def data_preprocessing_pipeline(data):
    #Identify numeric and categorical features
    numeric_features = data.select_dtypes(include=['float', 'int']).columns
    categorical_features = data.select_dtypes(include=['object']).columns

    #Handle missing values in numeric features
    data[numeric_features] = data[numeric_features].fillna(data[numeric_features].mean())

    #Detect and handle outliers in numeric features using IQR
    for feature in numeric_features:
        Q1 = data[feature].quantile(0.25)
        Q3 = data[feature].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - (1.5 * IQR)
        upper_bound = Q3 + (1.5 * IQR)
        data[feature] = np.where((data[feature] < lower_bound) | (data[feature] > upper_bound),
                                 data[feature].mean(), data[feature])

    #Normalize numeric features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data[numeric_features])
    data[numeric_features] = scaler.transform(data[numeric_features])

    #Handle missing values in categorical features
    data[categorical_features] = data[categorical_features].fillna(data[categorical_features].mode().iloc[0])

    return data

In [2]:
!pip install mplcursors

Collecting mplcursors
  Downloading mplcursors-0.5.3.tar.gz (88 kB)
     ---------------------------------------- 0.0/88.8 kB ? eta -:--:--
     ------------- -------------------------- 30.7/88.8 kB ? eta -:--:--
     ----------------- -------------------- 41.0/88.8 kB 495.5 kB/s eta 0:00:01
     -------------------------- ----------- 61.4/88.8 kB 469.7 kB/s eta 0:00:01
     -------------------------------------- 88.8/88.8 kB 456.8 kB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: mplcursors
  Building wheel for mplcursors (pyproject.toml): started
  Building wheel for mplcursors (pyproject.toml): finished with status 'done'
  Created wheel for mplcursors

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import mplcursors

# Sample DataFrame
data = {'Product': ['A', 'B', 'C'], 'Sales': [23, 17, 35]}
df = pd.DataFrame(data)

import plotly.express as px

In [None]:
px.bar(x = df['Product'], y = df['Sales'])