<a href="https://colab.research.google.com/github/Hoteeman/projects/blob/main/software%20sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip install ipywidgets plotly pandas


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi
Successfully installed jedi-0.19.1


In [16]:
import pandas as pd
import plotly.express as px
from google.colab import files
import ipywidgets as widgets
from IPython.display import display

# Step 1: File upload widget
upload_button = widgets.FileUpload(accept='.csv', multiple=False)
display(upload_button)

# Step 2: Create an empty DataFrame
df = None

def handle_upload(change):
    global df
    if upload_button.value:
        # Load the file into a pandas DataFrame
        uploaded_file = next(iter(upload_button.value.values()))
        file_content = uploaded_file['content']

        # Decode the uploaded file content to a string
        df = pd.read_csv(pd.io.common.BytesIO(file_content))

        # Drop missing values for clean data
        df.dropna(inplace=True)

        # Display success message and first 5 rows
        print(f"File loaded successfully with {df.shape[0]} rows and {df.shape[1]} columns.")
        display(df.head())

upload_button.observe(handle_upload, names='value')

# Step 3: Visualization options
x_axis_dropdown = widgets.Dropdown(options=[], description='X-Axis:')
y_axis_dropdown = widgets.Dropdown(options=[], description='Y-Axis:')
visualization_dropdown = widgets.Dropdown(
    options=["Scatter Plot", "Bar Chart", "Line Plot", "Box Plot"],
    description='Chart Type:',
)

# Step 4: Update dropdowns after file upload
def update_dropdowns():
    if df is not None:
        columns = df.columns.tolist()
        x_axis_dropdown.options = columns
        y_axis_dropdown.options = columns

visualization_dropdown.observe(lambda change: update_dropdowns(), names='value')

# Step 5: Visualization function
def visualize_data(b):
    if df is None:
        print("Please upload a CSV file first.")
        return

    x_col = x_axis_dropdown.value
    y_col = y_axis_dropdown.value
    chart_type = visualization_dropdown.value

    if chart_type == "Scatter Plot":
        fig = px.scatter(df, x=x_col, y=y_col)
    elif chart_type == "Bar Chart":
        fig = px.bar(df, x=x_col, y=y_col)
    elif chart_type == "Line Plot":
        fig = px.line(df, x=x_col, y=y_col)
    elif chart_type == "Box Plot":
        fig = px.box(df, x=x_col, y=y_col)

    fig.show()

# Step 6: Create the "Visualize" button
visualize_button = widgets.Button(description="Visualize Data")
visualize_button.on_click(visualize_data)

# Display the widgets
display(x_axis_dropdown, y_axis_dropdown, visualization_dropdown, visualize_button)


FileUpload(value={}, accept='.csv', description='Upload')

Dropdown(description='X-Axis:', options=(), value=None)

Dropdown(description='Y-Axis:', options=(), value=None)

Dropdown(description='Chart Type:', options=('Scatter Plot', 'Bar Chart', 'Line Plot', 'Box Plot'), value='Sca…

Button(description='Visualize Data', style=ButtonStyle())

File loaded successfully with 1131 rows and 20 columns.


Unnamed: 0,User ID,Vehicle Model,Battery Capacity (kWh),Charging Station ID,Charging Station Location,Charging Start Time,Charging End Time,Energy Consumed (kWh),Charging Duration (hours),Charging Rate (kW),Charging Cost (USD),Time of Day,Day of Week,State of Charge (Start %),State of Charge (End %),Distance Driven (since last charge) (km),Temperature (°C),Vehicle Age (years),Charger Type,User Type
0,User_1,BMW i3,108.463007,Station_391,Houston,2024-01-01 00:00:00,2024-01-01 00:39:00,60.712346,0.591363,36.389181,13.087717,Evening,Tuesday,29.371576,86.119962,293.602111,27.947953,2.0,DC Fast Charger,Commuter
1,User_2,Hyundai Kona,100.0,Station_428,San Francisco,2024-01-01 01:00:00,2024-01-01 03:01:00,12.339275,3.133652,30.677735,21.128448,Morning,Monday,10.115778,84.664344,112.112804,14.311026,3.0,Level 1,Casual Driver
2,User_3,Chevy Bolt,75.0,Station_181,San Francisco,2024-01-01 02:00:00,2024-01-01 04:48:00,19.128876,2.452653,27.513593,35.66727,Morning,Thursday,6.854604,69.917615,71.799253,21.002002,2.0,Level 2,Commuter
3,User_4,Hyundai Kona,50.0,Station_327,Houston,2024-01-01 03:00:00,2024-01-01 06:42:00,79.457824,1.266431,32.88287,13.036239,Evening,Saturday,83.120003,99.624328,199.577785,38.316313,1.0,Level 1,Long-Distance Traveler
4,User_5,Hyundai Kona,50.0,Station_108,Los Angeles,2024-01-01 04:00:00,2024-01-01 05:46:00,19.629104,2.019765,10.215712,10.161471,Morning,Saturday,54.25895,63.743786,203.661847,-7.834199,1.0,Level 1,Long-Distance Traveler


In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from google.colab import files
import os

class AdvancedDataVisualizer:
    def __init__(self):
        self.data = None
        self.file_path = None
        self.numeric_cols = []
        self.categorical_cols = []
        self.datetime_cols = []

    def request_file_upload(self) -> bool:
        print("Please upload a data file (CSV, Excel, JSON, Parquet, or Text):")
        uploaded = files.upload()

        if uploaded:
            self.file_path = list(uploaded.keys())[0]
            file_extension = os.path.splitext(self.file_path)[1].lower()

            with open(self.file_path, 'wb') as f:
                f.write(uploaded[self.file_path])

            return self._load_data(file_extension)
        else:
            print("No file selected.")
            return False

    def _load_data(self, file_extension) -> bool:
        try:
            loaders = {
                '.csv': pd.read_csv,
                '.xlsx': pd.read_excel,
                '.xls': pd.read_excel,
                '.json': pd.read_json,
                '.parquet': pd.read_parquet,
                '.txt': self._load_text
            }

            if file_extension in loaders:
                self.data = loaders[file_extension](self.file_path)
            else:
                print(f"Unsupported file type: {file_extension}")
                return False

            # Clean the data (remove rows with missing values)
            self.data = self.data.dropna()
            self._post_load_processing()
            return True
        except Exception as e:
            print(f"Error loading file: {e}")
            return False

    def _load_text(self, file_path):
        with open(file_path, 'r') as f:
            first_line = f.readline()

        potential_delimiters = [',', '\t', '|', ';']
        delimiter = max(potential_delimiters, key=lambda d: first_line.count(d))

        return pd.read_csv(file_path, delimiter=delimiter)

    def _post_load_processing(self):
        # Convert datetime columns
        self.datetime_cols = []
        for col in self.data.columns:
            if self.data[col].dtype == 'object':
                try:
                    date_series = pd.to_datetime(self.data[col], errors='coerce')
                    if pd.notnull(date_series).any():
                        self.data[col] = date_series
                        self.datetime_cols.append(col)
                except (ValueError, TypeError):
                    pass

        # Categorize numeric and categorical columns
        self.numeric_cols = self.data.select_dtypes(include=[np.number]).columns.tolist()
        self.categorical_cols = self.data.select_dtypes(include=['object', 'category']).columns.tolist()

        # Print dataset summary
        print(f"\nDataset shape after cleaning: {self.data.shape}")
        print(f"Numeric columns: {', '.join(self.numeric_cols)}")
        print(f"Categorical columns: {', '.join(self.categorical_cols)}")
        print(f"Datetime columns: {', '.join(self.datetime_cols)}")

    def display_menu(self):
        menu = """
Available visualizations:
1. Scatter Plot
2. Line Plot
3. Bar Chart
4. Histogram
5. Box Plot
6. Heatmap
7. Pie Chart
8. 3D Scatter Plot
9. Parallel Coordinates Plot
10. Exit
Please enter the number of the visualization you'd like to generate:
"""
        print(menu)

    def process_user_choice(self, choice):
        visualization_methods = {
            "1": self._show_scatter_plot,
            "2": self._show_line_plot,
            "3": self._show_bar_chart,
            "4": self._show_histogram,
            "5": self._show_box_plot,
            "6": self._show_heatmap,
            "7": self._show_pie_chart,
            "8": self._show_3d_scatter_plot,
            "9": self._show_parallel_coordinates,
            "10": self._exit_program
        }

        if choice in visualization_methods:
            return visualization_methods[choice]()
        else:
            print("Invalid choice. Please try again.")

    def _select_columns(self, prompt, multiple=False):
        print(f"\nAvailable columns: {', '.join(self.data.columns)}")
        if multiple:
            cols = input(f"{prompt} (comma-separated): ").strip().split(',')
            selected_cols = [col.strip() for col in cols if col.strip() in self.data.columns]
            invalid_cols = [col.strip() for col in cols if col.strip() not in self.data.columns]
            if invalid_cols:
                print(f"Ignored invalid columns: {', '.join(invalid_cols)}")
            return selected_cols
        else:
            col = input(prompt).strip()
            if col in self.data.columns:
                return col
            else:
                print(f"Column '{col}' does not exist.")
                return None

    def _get_axis_label(self, axis):
        label = input(f"Enter label for {axis}-axis (press Enter to use the column name): ").strip()
        return label if label else None

    def _show_scatter_plot(self):
        try:
            x_col = self._select_columns("Select x-axis column")
            y_col = self._select_columns("Select y-axis column")
            color_col = self._select_columns("Select color column (optional)")
            size_col = self._select_columns("Select size column (optional)")

            if not x_col or not y_col:
                print("Invalid column selection.")
                return

            x_label = self._get_axis_label('x') or x_col
            y_label = self._get_axis_label('y') or y_col

            fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col, size=size_col,
                             labels={x_col: x_label, y_col: y_label},
                             title=f'Scatter Plot: {x_col} vs {y_col}')
            fig.show()
        except Exception as e:
            print(f"Error generating scatter plot: {e}")

    def _show_line_plot(self):
        try:
            x_col = self._select_columns("Select x-axis column")
            y_cols = self._select_columns("Select y-axis column(s)", multiple=True)

            if not x_col or not y_cols:
                print("Invalid column selection.")
                return

            x_label = self._get_axis_label('x') or x_col
            y_label = self._get_axis_label('y') or ", ".join(y_cols)

            fig = go.Figure()
            for y_col in y_cols:
                fig.add_trace(go.Scatter(x=self.data[x_col], y=self.data[y_col], mode='lines', name=y_col))

            fig.update_layout(title=f'Line Plot', xaxis_title=x_label, yaxis_title=y_label)
            fig.show()
        except Exception as e:
            print(f"Error generating line plot: {e}")

    def _show_bar_chart(self):
        try:
            x_col = self._select_columns("Select x-axis column")
            y_col = self._select_columns("Select y-axis column")

            if not x_col or not y_col:
                print("Invalid column selection.")
                return

            x_label = self._get_axis_label('x') or x_col
            y_label = self._get_axis_label('y') or y_col

            fig = px.bar(self.data, x=x_col, y=y_col, title=f'Bar Chart: {x_col} vs {y_col}')
            fig.update_layout(xaxis_title=x_label, yaxis_title=y_label)
            fig.show()
        except Exception as e:
            print(f"Error generating bar chart: {e}")

    def _show_histogram(self):
        try:
            col = self._select_columns("Select column for histogram")

            if not col:
                print("Invalid column selection.")
                return

            fig = px.histogram(self.data, x=col, title=f'Histogram of {col}')
            fig.update_layout(xaxis_title=col, yaxis_title="Count")
            fig.show()
        except Exception as e:
            print(f"Error generating histogram: {e}")

    def _show_box_plot(self):
        try:
            y_col = self._select_columns("Select column for box plot")
            color_col = self._select_columns("Select color column (optional)")

            if not y_col:
                print("Invalid column selection.")
                return

            fig = px.box(self.data, y=y_col, color=color_col, title=f'Box Plot of {y_col}')
            fig.show()
        except Exception as e:
            print(f"Error generating box plot: {e}")

    def _show_heatmap(self):
        try:
            cols = self._select_columns("Select columns for heatmap (comma-separated)", multiple=True)

            if not cols or len(cols) < 2:
                print("At least two columns are required for a heatmap.")
                return

            corr_matrix = self.data[cols].corr()

            fig = go.Figure(data=go.Heatmap(
                z=corr_matrix.values,
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                colorscale='Viridis'
            ))

            fig.update_layout(title='Correlation Heatmap', xaxis_title='Columns', yaxis_title='Columns')
            fig.show()
        except Exception as e:
            print(f"Error generating heatmap: {e}")

    def _show_pie_chart(self):
        try:
            names_col = self._select_columns("Select column for pie chart categories")
            values_col = self._select_columns("Select column for pie chart values")

            if not names_col or not values_col:
                print("Invalid column selection.")
                return

            fig = px.pie(self.data, names=names_col, values=values_col, title=f'Pie Chart of {names_col}')
            fig.show()
        except Exception as e:
            print(f"Error generating pie chart: {e}")

    def _show_3d_scatter_plot(self):
        try:
            x_col = self._select_columns("Select x-axis column")
            y_col = self._select_columns("Select y-axis column")
            z_col = self._select_columns("Select z-axis column")

            if not x_col or not y_col or not z_col:
                print("Invalid column selection.")
                return

            fig = px.scatter_3d(self.data, x=x_col, y=y_col, z=z_col, title=f'3D Scatter Plot: {x_col} vs {y_col} vs {z_col}')
            fig.show()
        except Exception as e:
            print(f"Error generating 3D scatter plot: {e}")

    def _show_parallel_coordinates(self):
        try:
            cols = self._select_columns("Select columns for parallel coordinates (comma-separated)", multiple=True)

            if not cols or len(cols) < 2:
                print("At least two columns are required for parallel coordinates plot.")
                return

            fig = px.parallel_coordinates(self.data, dimensions=cols, title='Parallel Coordinates Plot')
            fig.show()
        except Exception as e:
            print(f"Error generating parallel coordinates plot: {e}")

    def _exit_program(self):
        print("Exiting the program. Goodbye!")
        return "exit"

def main():
    visualizer = AdvancedDataVisualizer()

    print("Welcome to the Advanced Data Visualizer!")

    if not visualizer.request_file_upload():
        print("Failed to load data. Exiting program.")
        return

    while True:
        visualizer.display_menu()
        choice = input("Enter your choice: ")

        result = visualizer.process_user_choice(choice)
        if result == "exit":
            break

if __name__ == "__main__":
    main()


Welcome to the Advanced Data Visualizer!
Please upload a data file (CSV, Excel, JSON, Parquet, or Text):



Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and a

Saving ev_charging_patterns.csv to ev_charging_patterns (6).csv

Dataset shape after cleaning: (1131, 20)
Numeric columns: Battery Capacity (kWh), Energy Consumed (kWh), Charging Duration (hours), Charging Rate (kW), Charging Cost (USD), State of Charge (Start %), State of Charge (End %), Distance Driven (since last charge) (km), Temperature (°C), Vehicle Age (years)
Categorical columns: User ID, Vehicle Model, Charging Station ID, Charging Station Location, Time of Day, Day of Week, Charger Type, User Type
Datetime columns: Charging Start Time, Charging End Time

Available visualizations:
1. Scatter Plot
2. Line Plot
3. Bar Chart
4. Histogram
5. Box Plot
6. Heatmap
7. Pie Chart
8. 3D Scatter Plot
9. Parallel Coordinates Plot
10. Exit
Please enter the number of the visualization you'd like to generate:

Enter your choice: 3

Available columns: User ID, Vehicle Model, Battery Capacity (kWh), Charging Station ID, Charging Station Location, Charging Start Time, Charging End Time, Energy C


Available visualizations:
1. Scatter Plot
2. Line Plot
3. Bar Chart
4. Histogram
5. Box Plot
6. Heatmap
7. Pie Chart
8. 3D Scatter Plot
9. Parallel Coordinates Plot
10. Exit
Please enter the number of the visualization you'd like to generate:

Enter your choice: 2

Available columns: User ID, Vehicle Model, Battery Capacity (kWh), Charging Station ID, Charging Station Location, Charging Start Time, Charging End Time, Energy Consumed (kWh), Charging Duration (hours), Charging Rate (kW), Charging Cost (USD), Time of Day, Day of Week, State of Charge (Start %), State of Charge (End %), Distance Driven (since last charge) (km), Temperature (°C), Vehicle Age (years), Charger Type, User Type
Select x-axis columnVehicle Model, Battery Capacity (kWh)
Column 'Vehicle Model, Battery Capacity (kWh)' does not exist.

Available columns: User ID, Vehicle Model, Battery Capacity (kWh), Charging Station ID, Charging Station Location, Charging Start Time, Charging End Time, Energy Consumed (kWh), Char

KeyboardInterrupt: Interrupted by user

In [3]:
!pip install openai


Collecting openai
  Downloading openai-1.51.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.5/383.5 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━