<a href="https://colab.research.google.com/github/Miron-Hanukaiev/Introduction_to_Cloud_Computing/blob/main/ex4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================
# Import required libraries
# ==========================================
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display

# ==========================================
# Load CSV file
# ==========================================
from google.colab import files
uploaded = files.upload()

file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

print("Columns detected in the dataset:\n")
print(df.columns.tolist())


# ==========================================
# Auto-detect matching column names
# ==========================================
def detect_column(possible_names):
    """Return first matching column from the dataset."""
    for col in df.columns:
        if col.lower() in possible_names:
            return col
    return None

col_make  = detect_column(['tozeret_nm', 'make', 'manufacturer', 'brand'])
col_model = detect_column(['kinuy_mishari', 'model', 'trim', 'submodel'])
col_year  = detect_column(['shnat_yitzur', 'year', 'production_year'])
col_trim  = detect_column(['ramat_gimur', 'trim_level', 'version'])

print("\nAuto-matched column names:")
print("Make:", col_make)
print("Model:", col_model)
print("Year:", col_year)
print("Trim:", col_trim)


# ==========================================
# TAB 1 – Data Overview
# ==========================================
tab1 = widgets.Output()
with tab1:
    print("==== FIRST 10 ROWS ====\n")
    display(df.head(10))

    print("\n==== DESCRIPTIVE STATISTICS ====\n")
    display(df.describe(include='all'))


# ==========================================
# TAB 2 – General Information
# ==========================================
tab2 = widgets.Output()
with tab2:
    print("==== DATAFRAME INFO ====\n")

    buffer = []
    df.info(buf=buffer.append)  # capture df.info() output
    print("\n".join(buffer))

    print("\n==== MISSING VALUES ====\n")
    display(df.isna().sum())


# ==========================================
# TAB 3 – Year Distribution Plot
# ==========================================
tab3 = widgets.Output()
with tab3:
    if col_year is None:
        print("No year column detected. Unable to display year distribution plot.")
    else:
        plt.figure(figsize=(10, 5))
        sns.histplot(df[col_year].dropna(), kde=True, bins=30, color='blue')
        plt.title("Distribution of Vehicle Production Years")
        plt.xlabel("Year")
        plt.ylabel("Count")
        plt.grid(False)
        plt.show()


# ==========================================
# Create the tabs widget
# ==========================================
tabs = widgets.Tab(children=[tab1, tab2, tab3])
tabs.set_title(0, 'Overview')
tabs.set_title(1, 'General Info')
tabs.set_title(2, 'Year Distribution')

display(tabs)

