Sure, here is an outline of the function flow with suggested DataFrame names for better clarity:

1. **Main Function: main**
   - **Reads:** Uploaded file (file)
   - **Returns:** None
   - **Writes:** None

2. **File Reading Function: read_file**
   - **Reads:** Uploaded file (file)
   - **Returns:** Initial DataFrame (`initial_df`)
   - **Writes:** None

3. **Column Cleaning Function: clean_columns**
   - **Reads:** Initial DataFrame (`initial_df`)
   - **Returns:** Cleaned DataFrame (`cleaned_df`)
   - **Writes:** None

4. **Initial Data Inspection Function: display_initial_inspection**
   - **Reads:** Cleaned DataFrame (`cleaned_df`)
   - **Returns:** None
   - **Writes:** None

5. **Column Headers Display Function: display_column_headers**
   - **Reads:** Cleaned DataFrame (`cleaned_df`)
   - **Returns:** None
   - **Writes:** None

6. **Datatype Options Creation Function: create_datatype_options**
   - **Reads:** Current column datatype (current_type)
   - **Returns:** Datatype options (options)
   - **Writes:** None

7. **Datatype Conversion Function: convert_datatypes**
   - **Reads:** Cleaned DataFrame (`cleaned_df`), Datatype map (datatype_map)
   - **Returns:** Converted DataFrame (`converted_df`)
   - **Writes:** None

8. **Data Preview Function: preview_data**
   - **Reads:** Converted DataFrame (`converted_df`), Datatype map (datatype_map)
   - **Returns:** Sorted DataFrame (sorted_df)
   - **Writes:** None

9. **Sorted Dataset Saving Function: save_sorted_dataset**
   - **Reads:** Sorted DataFrame (sorted_df), Filename (filename)
   - **Returns:** None
   - **Writes:** Sorted DataFrame to disk

10. **Saved Dataset Loading Function: load_saved_dataset**
    - **Reads:** None
    - **Returns:** Loaded DataFrame (`loaded_df`)
    - **Writes:** None

11. **Trade Analysis Function: `analyze_trades`**
    - **Reads:** Loaded DataFrame (`loaded_df`)
    - **Returns:** None
    - **Writes:** None

Based on this outline, here are the suggested DataFrame names to be used throughout the program:

- `initial_df` for the DataFrame read from the uploaded file.
- `cleaned_df` for the DataFrame after cleaning column names.
- `converted_df` for the DataFrame after datatype conversion.
- `sorted_df` for the DataFrame after sorting.
- `loaded_df` for the DataFrame loaded from a saved file.

Let's update the code with these DataFrame names:



In [None]:


import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
from itables import show, init_notebook_mode
import nest_asyncio
import pyarrow as pa
from datetime import datetime
import logging
from pathlib import Path
import os
import io
import zipfile

# Initialize iTables
init_notebook_mode()

# Set Streamlit to wide mode
st.set_page_config(layout="wide")

nest_asyncio.apply()

# Set up logging
log_file = Path(f"trade_data_preparation_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to clean column names
def clean_column_names(df):
    return df.rename(columns=lambda x: x.replace(' ', '_').replace('/', '_').replace('(', '').replace(')', ''))

# Function to display initial data inspection
def display_initial_inspection(cleaned_df):
    st.subheader("Initial Data Inspection")
    st.write("First few rows of the uploaded file:")
    st.write(cleaned_df.head())

# Function to display column headers and datatypes
def display_column_headers(cleaned_df):
    st.subheader("Extracted Column Headers and Datatypes")
    headers_df = pd.DataFrame({
        'Column_Name': cleaned_df.columns,
        'Column_Datatype': cleaned_df.dtypes
    })
    st.dataframe(headers_df)

# Function to create datatype selection options
def create_datatype_options(current_type):
    options = []
    if current_type == 'object':
        options = ['int64', 'float64', 'datetime64', 'object']
    elif current_type in ['int64', 'float64']:
        options = ['int64', 'float64', 'object']
    elif 'datetime64' in current_type:
        options = ['datetime64', 'object']
    else:
        options = ['object']
    return options

# Function to convert datatypes
def convert_datatypes(cleaned_df, datatype_map):
    for col, new_type in datatype_map.items():
        try:
            if new_type == 'datetime64':
                cleaned_df[col] = pd.to_datetime(cleaned_df[col], errors='coerce')
            else:
                cleaned_df[col] = cleaned_df[col].astype(new_type)
            logging.info(f"Converted column {col} to {new_type}")
        except Exception as e:
            logging.error(f"Error converting column {col} to {new_type}: {str(e)}")
            st.error(f"Error converting column {col} to {new_type}: {str(e)}")
    return cleaned_df

# Function to preview data after datatype conversion
def preview_data(converted_df, datatype_map):
    st.subheader("Preview of Data After Datatype Conversion")
    st.write("You can sort the DataFrame by clicking on the column headers.")
    sorted_df = st.dataframe(converted_df)
    st.write(converted_df.dtypes)
    return sorted_df

# Function to save sorted dataset
def save_sorted_dataset(sorted_df, filename):
    try:
        sorted_df.to_csv(filename, index=False)
        logging.info(f"Successfully saved sorted dataset to {filename}")
        st.success(f"Successfully saved sorted dataset to {filename}")
    except Exception as e:
        logging.error(f"Error saving sorted dataset to {filename}: {str(e)}")
        st.error(f"Error saving sorted dataset to {filename}: {str(e)}")

# Function to load saved dataset
def load_saved_dataset():
    saved_files = [f for f in os.listdir() if f.startswith('trade_performance_dataset_cleaned_') and f.endswith('.xlsx')]
    if saved_files:
        selected_file = st.selectbox("Select a saved dataset", saved_files)
        try:
            loaded_df = pd.read_excel(selected_file)
            logging.info(f"Loaded saved dataset from {selected_file}")
            st.success(f"Loaded saved dataset from {selected_file}")
            return loaded_df
        except Exception as e:
            logging.error(f"Error loading dataset from {selected_file}: {str(e)}")
            st.error(f"Error loading dataset from {selected_file}: {str(e)}")
    else:
        st.info("No saved datasets found.")
        return None

# Main function
def main():
    st.title("Trade Transaction Performance Analyzer")

    # File upload
    file = st.file_uploader("Upload your XLSX or CSV file", type=["xlsx", "csv"])
    if not file:
        st.stop()

    # Read the file
    @st.cache_data
    def read_file(file):
        try:
            if file.name.endswith('.xlsx'):
                return pd.read_excel(file)
            else:
                return pd.read_csv(file)
        except Exception as e:
            st.error(f"Error reading file: {str(e)}")
            return None

    initial_df = read_file(file)
    if initial_df is None:
        return

    # Clean column names
    @st.cache_data
    def clean_columns(initial_df):
        return clean_column_names(initial_df)

    cleaned_df = clean_columns(initial_df)

    # Display initial data inspection
    display_initial_inspection(cleaned_df)

    # Display column headers and datatypes
    display_column_headers(cleaned_df)

    # Create datatype selection options
    datatype_options = {col: create_datatype_options(str(dtype)) for col, dtype in cleaned_df.dtypes.items()}

    # Create interactive grid for datatype selection
    with st.expander("Select Datatypes"):
        datatype_map = {}
        for col, options in datatype_options.items():
            current_type = str(cleaned_df[col].dtype)
            if 'datetime64' in current_type:
                current_type = 'datetime64'
            datatype_map[col] = st.selectbox(f"Select datatype for {col}", options, index=options.index(current_type))

    # Preview data after datatype conversion
    if st.button("Preview Data After Datatype Conversion"):
        @st.cache_data
        def convert_and_preview(cleaned_df, datatype_map):
            converted_df = convert_datatypes(cleaned_df.copy(), datatype_map)
            return converted_df

        converted_df = convert_and_preview(cleaned_df, datatype_map)
        sorted_df = preview_data(converted_df, datatype_map)

        # Save sorted dataset
        if st.button("Save Sorted Dataset"):
            current_datetime = datetime.now().strftime('%Y%m%d_%H%M%S')
            sorted_csv_filename = f"trade_performance_dataset_sorted_{current_datetime}.csv"
            save_sorted_dataset(sorted_df, sorted_csv_filename)

    # Save dataset
    if st.button("Save Cleaned Dataset"):
        current_datetime = datetime.now().strftime('%Y%m%d_%H%M%S')
        csv_filename = f"trade_performance_dataset_cleaned_{current_datetime}.csv"
        xlsx_filename = f"trade_performance_dataset_cleaned_{current_datetime}.xlsx"
        
        df_cleaned = convert_datatypes(cleaned_df, datatype_map)
        save_sorted_dataset(df_cleaned, csv_filename)
        df_cleaned.to_excel(xlsx_filename, index=False)
        logging.info(f"Successfully saved cleaned dataset to {csv_filename} and {xlsx_filename}")
        st.success(f"Successfully saved cleaned dataset to {csv_filename} and {xlsx_filename}")

    # Load saved dataset
    if st.button("Load Saved Dataset"):
        loaded_df = load_saved_dataset()
        if loaded_df is not None:
            display_initial_inspection(loaded_df)
            display_column_headers(loaded_df)

    # Analyze trades
    if st.button("Analyze Trades"):
        if 'loaded_df' not in locals():
            st.error("Please load or upload a dataset first.")
            return

        try:
            loaded_df['Cumulative_Profit_Loss'] = loaded_df['Profit_Loss'].cumsum()
        except Exception as e:
            st.error(f"Error calculating Cumulative Profit/Loss: {str(e)}")
            return

        # Display data
        st.subheader("Trade data:")
        st.dataframe(loaded_df)

        # Create and display plot
        st.subheader("Cumulative Profit/Loss Chart:")
        try:
            fig = px.line(loaded_df, x='Opened', y='Cumulative_Profit_Loss', title='Cumulative Profit/Loss Over Time')
            fig.update_xaxes(title='Date')
            fig.update_yaxes(title='Cumulative Profit/Loss')
            st.plotly_chart(fig, use_container_width=True)
        except Exception as e:
            st.error(f"Error creating plot: {str(e)}")

        # Allow user to select date range for chart
        st.subheader("Select date range for chart:")
        try:
            start_date = st.date_input("Start date", value=loaded_df['Opened'].min())
            end_date = st.date_input("End date", value=loaded_df['Opened'].max())
        except Exception as e:
            st.error(f"Error selecting date range: {str(e)}")
            return

        # Filter data and create new plot
        try:
            filtered_df = loaded_df[(loaded_df['Opened'] >= start_date) & (loaded_df['Opened'] <= end_date)]
            fig = px.line(filtered_df, x='Opened', y='Cumulative_Profit_Loss', title='Filtered Cumulative Profit/Loss Over Time')
            fig.update_xaxes(title='Date')
            fig.update_yaxes(title='Cumulative Profit/Loss')
            st.subheader("Filtered Cumulative Profit/Loss Chart:")
            st.plotly_chart(fig, use_container_width=True)
        except Exception as e:
            st.error(f"Error filtering data or creating filtered plot: {str(e)}")

if __name__ == "__main__":
    main()



In this updated code:
- The DataFrame names are updated to `initial_df`, `cleaned_df`, `converted_df`, 

sorted_df

, and `loaded_df` for better clarity.
- The function signatures and calls are updated to use these new DataFrame names.