# Import libraries and define global variable 'df'

In [1]:
import ipywidgets as widgets
import pandas as pd
import io
import matplotlib.pyplot as plt
import plotly.express as px

# Global variable to store the dataframe
df = pd.DataFrame()

# Define functions for data display and cleaning

In [2]:
# Function to handle file upload and data display
def display_data(button):
    global df
    with data_output:
        data_output.clear_output(wait=True)
        uploaded_file = upload_button.value
        for name, file_info in uploaded_file.items():
            content = file_info['content']
            try:
                text = content.decode('utf-8')
            except UnicodeDecodeError:
                try:
                    text = content.decode('iso-8859-1')
                except Exception as e:
                    print(f"Could not decode file: {e}")
                    return
            
            with io.StringIO(text) as f:
                df = pd.read_csv(f, parse_dates=['Date'], index_col='Date')  # Ensure Date column is parsed as datetime and set as index
                
                # Displaying basic information
                with info_output:
                    info_output.clear_output(wait=True)
                    print(f"File: {name}")
                    print(f"Rows: {df.shape[0]}")
                    print(f"Columns: {df.shape[1]}")
                    print(f"Missing Values: {df.isnull().sum().sum()}")
                
                # Displaying the first 5 rows of the data
                print("Displaying first 5 rows of the data:")
                print(df.head())
                
                # Update parameter options in dropdown
                update_parameter_options(df)


# Function to perform automatic data cleaning
def automatic_data_cleaning(df):
    # 1. Handling Missing Values: Option B - Impute with median
    df = df.fillna(df.median())
    
    # 2. Encoding Categorical Variables: One-hot encoding
    df = pd.get_dummies(df, drop_first=True)
    
    # 3. Normalizing Data: Min-Max Scaling
    df = (df - df.min()) / (df.max() - df.min())
    
    # 4. Handling Outliers: Remove data points > 3 std devs from mean
    for col in df.select_dtypes(include=['float64', 'int64']).columns:
        df = df[(df[col] > df[col].mean() - 3*df[col].std()) & (df[col] < df[col].mean() + 3*df[col].std())]
    
    # 5. Feature Engineering: Example - Adding interaction terms (product of all pairs of variables)
    # Note: Depending on the number of columns, this can significantly increase dataset size
    original_columns = df.columns
    for col_a in original_columns:
        for col_b in original_columns:
            if col_a != col_b:
                df[f"{col_a}_x_{col_b}"] = df[col_a] * df[col_b]
    
    return df

# Define and display widgets

In [3]:
# Define widgets
upload_button = widgets.FileUpload(accept='.csv', multiple=False)
display_button = widgets.Button(description="Display Data")
data_output = widgets.Output()
info_output = widgets.Output()
parameter_dropdown = widgets.Dropdown(options=[], value=None, description='Parameter:')
graph_output = widgets.Output()

# Function to update parameter options in dropdown
def update_parameter_options(df):
    parameter_dropdown.options = df.columns
    parameter_dropdown.value = df.columns[0] if not df.empty else None

# Function to display graph based on selected parameter
def display_graph(change):
    with graph_output:
        graph_output.clear_output(wait=True)
        selected_parameter = change.new
        if selected_parameter and not df.empty:
            # Create an interactive line plot using Plotly Express
            fig = px.line(df, x=df.index, y=selected_parameter, title=f'Graph for {selected_parameter}')
            fig.update_xaxes(title_text='Date')  # Set x-axis label
            fig.update_yaxes(title_text=selected_parameter)  # Set y-axis label
            fig.show()

# Link dropdown value change to graph display function
parameter_dropdown.observe(display_graph, names='value')

# Link display button to data display function
display_button.on_click(display_data)  

# Display widgets
widgets.VBox([
    upload_button, display_button, info_output, data_output,
    parameter_dropdown, graph_output
])


VBox(children=(FileUpload(value={}, accept='.csv', description='Upload'), Button(description='Display Data', s…