In [1]:
import os
import csv
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import time
import dask.dataframe as dd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import numpy as np
import dash
from dash import dcc, html
import calendar
from dash.dependencies import Input, Output
from datetime import datetime
import folium

In [3]:
# Function to get the data from the csv files and return a dataframe
def csv_to_df(filepath):
    try:
        return pd.read_csv(filepath, sep=',')
    except pd.errors.ParserError:
        return pd.read_csv(filepath, sep=';')

passagierfrequenz_df = csv_to_df('../../raw_data/passagierfrequenz.csv')
haltestelle_df = csv_to_df('../../raw_data/fahrzeiten_2022/Haltestelle.csv')
haltepunkt_df = csv_to_df('../../raw_data/fahrzeiten_2022/Haltepunkt.csv')

In [21]:
"""
This function provides a user interface for exploring CSV files in the '../raw_data' directory.
The user can navigate through the data using 'Head', 'Previous', 'Next', and 'Tail' buttons, which display the data in chunks of 10 rows at a time.
"""
def show_other_csvs():
    start = 0
    df = None

    # Create UI elements
    head_button = widgets.Button(description='Head')
    prev_button = widgets.Button(description='Previous')
    next_button = widgets.Button(description='Next')
    tail_button = widgets.Button(description='Tail')
    output = widgets.Output()
    column_dropdown = widgets.Dropdown(options=df.columns if df is not None else [])
    search_input = widgets.Text(value='', placeholder='Type something', description='Search:', disabled=False)
    search_button = widgets.Button(description='Search')
    dimensions_label = widgets.HTML()
    not_checkbox = widgets.Checkbox(value=False, description='NOT', layout=widgets.Layout(width='auto'))
    describe_button = widgets.Button(description='Describe')

    # Function to load DataFrame from the list
    def show_df(button_instance=None):
        nonlocal start, df
        start = 0
        df = dataframes[dropdown.value]
        column_dropdown.options = df.columns
        show_output()

    # Event handlers for button clicks
    def on_head_button_clicked(b):
        nonlocal start
        start = 0
        show_output()

    def on_prev_button_clicked(b):
        nonlocal start
        start = max(0, start-10)
        show_output()

    def on_next_button_clicked(b):
        nonlocal start
        start = min(len(df)-10, start+10)
        show_output()

    def on_tail_button_clicked(b):
        nonlocal start
        start = len(df)-10
        show_output()

    # Function to display DataFrame in output widget
    def show_output():
        with output:
            output.clear_output()
            display(HTML('<div style="overflow-x: auto; white-space: nowrap;">' 
                        + df.iloc[start:start+10].to_html() + '</div>'))
            # Update the dimensions label
            dimensions_label.value = f'<h4>Dimensions: {df.shape}</h4>'

    # Function to show search results
    def show_search(button_instance=None):
        nonlocal df
        if search_input.value:
            if not_checkbox.value:
                df = df[~df[column_dropdown.value].astype(str).str.contains(search_input.value)]
            else:
                df = df[df[column_dropdown.value].astype(str).str.contains(search_input.value)]
        show_output()
    
    # Function to show description
    def show_description(button_instance=None):
        with output:
            output.clear_output()
            if df is not None:
                desc_df = df.describe()
                desc_df = desc_df.applymap(lambda x: '{:.0f}'.format(x) if x == int(x) else '{:.4f}'.format(x))
                display(HTML('<div style="overflow-x: auto; white-space: nowrap;">' 
                        + desc_df.to_html() + '</div>'))
            else:
                display(HTML('<p style="color: red;">Please first select "Show".</p>'))

    # Get list of dataframe names
    df_names = list(dataframes.keys())
    
    # Create dropdown and show button
    dropdown = widgets.Dropdown(options=df_names)
    show_button = widgets.Button(description='Show')

    # Display UI elements
    title = widgets.HTML('<h2 style="text-align: center;">Other CSVs</h2>')
    box_layout = widgets.Layout(display='flex', justify_content='center')
    display(
        widgets.VBox(
            [
                title, widgets.HBox([dropdown, show_button, describe_button], layout=box_layout),
                widgets.HBox([not_checkbox, column_dropdown, search_input, search_button], layout=box_layout),
                output,
                widgets.HBox([dimensions_label], layout=widgets.Layout(justify_content='flex-start')),
                widgets.HBox([head_button, prev_button, next_button, tail_button], layout=box_layout)
            ],
            layout=box_layout
        )
    )
    
    # Attach event handlers to buttons
    show_button.on_click(show_df)
    describe_button.on_click(show_description)
    head_button.on_click(on_head_button_clicked)
    prev_button.on_click(on_prev_button_clicked)
    next_button.on_click(on_next_button_clicked)
    tail_button.on_click(on_tail_button_clicked)
    search_button.on_click(show_search)

In [22]:
show_other_csvs()

VBox(children=(HTML(value='<h2 style="text-align: center;">Other CSVs</h2>'), HBox(children=(Dropdown(options=…

In [None]:
# RADIAL BAR CHART

# from math import log10
# import matplotlib.cm as cm
# # RADIAL BAR CHART
# def create_radial_bar_chart_empty_cells_percentage(df):
#     empty_cells_percentages = calculate_empty_cells_percentages(df)
        
#     # Convert the dictionary to a DataFrame for easier plotting
#     df_empty_cells = pd.DataFrame(empty_cells_percentages, index=['Empty Cells', 'Percentage']).T
#     df_empty_cells = df_empty_cells.sort_values('Percentage', ascending=False)  # Sort by percentage for better visualization

#     labels = df_empty_cells.index
#     data = df_empty_cells['Percentage'] * 100

#     # Number of data points
#     n = len(data)
#     # Find max value for full ring
#     k = 10 ** int(log10(max(data)))
#     m = k * (1 + max(data) // k)

#     # Radius of donut chart
#     r = 1.5
#     # Calculate width of each ring
#     w = r / n 

#     # Create colors along a chosen colormap
#     colors = [cm.terrain(i / n) for i in range(n)]

#     # Create figure, axis
#     fig, ax = plt.subplots()
#     ax.axis("equal")

#     # Create rings of donut chart
#     for i in range(n):
#         # Hide labels in segments with textprops: alpha = 0 - transparent, alpha = 1 - visible
#         innerring, _ = ax.pie([m - data[i], data[i]], radius = r - i * w, startangle = 90, labels = ["", f"{labels[i]}: {data[i]:.1f}%"], labeldistance = 1 - 1 / (1.5 * (n - i)), textprops = {"alpha": 0}, colors = ["white", colors[i]])
#         plt.setp(innerring, width = w, edgecolor = "white")

#     plt.legend()
#     plt.show()