### Library Imports

In [1]:
import pandas as pd
import plotly.express as px
import panel as pn
import re
from IPython.display import IFrame
import time
import random

### Import Dataset

In [2]:
url = "https://raw.githubusercontent.com/Lughaidh-w/Laptop-Prices/main/laptop_price1.csv"
df = pd.read_csv(url)
# Save original dataset for later use if needed.
df_original = df

### Initial Information

In [3]:
print(df.info())
display(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Memory            1303 non-null   object 
 9   Gpu               1303 non-null   object 
 10  OpSys             1303 non-null   object 
 11  Weight            1303 non-null   object 
 12  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(10)
memory usage: 132.5+ KB
None


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


### Unique and null values

In [4]:
def unique_and_null(df):
    table = pd.DataFrame(columns=['Column', 'Entries', 'Nunique', 'Null values'])
    for col in df.columns:
        nunique = df[col].nunique()
        null_values = df[col].isnull().sum()
        entries = len(df[col])
        new_row = {'Column': col, 'Entries': entries, 'Nunique': nunique, 'Null values': null_values}
        table = pd.concat([table, pd.DataFrame([new_row])])
    print(table.to_string(index=False))

unique_and_null(df)   

          Column Entries Nunique Null values
       laptop_ID    1303    1303           0
         Company    1303      19           0
         Product    1303     618           0
        TypeName    1303       6           0
          Inches    1303      18           0
ScreenResolution    1303      40           0
             Cpu    1303     118           0
             Ram    1303       9           0
          Memory    1303      39           0
             Gpu    1303     110           0
           OpSys    1303       9           0
          Weight    1303     179           0
     Price_euros    1303     791           0


### Statistical Information on continuous variables

In [5]:
display(df.describe())

Unnamed: 0,laptop_ID,Inches,Price_euros
count,1303.0,1303.0,1303.0
mean,660.155794,15.017191,1123.686992
std,381.172104,1.426304,699.009043
min,1.0,10.1,174.0
25%,331.5,14.0,599.0
50%,659.0,15.6,977.0
75%,990.5,15.6,1487.88
max,1320.0,18.4,6099.0


### Plot tuning
#### These can be placed inside the functions

In [6]:
# variables outside the functions for tuning
plot_height = 400
plot_width = 600

grid_height = 1000
grid_width = 1300

target = "Price_euros"

# Colours
colour_background = "#1f1e2c"
colour_text = "#9192a4"

plot_background = "#27283c"
colour_scatter = "#296bb0"
colour_trendline = "#2cb9a3"
colour_histo = "#ac47a1"

### Graphing Functions

In [7]:
# histogram
def initial_histogram(df, var, height, width):
    title = var
    fig = px.histogram(df, x=var,color=var, color_discrete_sequence=[colour_histo])
    fig.update_layout(
        height=height,
        width = width,
        title={
            "text": title,
            "x": 0.5
        },        
        #xaxis_title=var,
        yaxis_title="Count",
        plot_bgcolor=plot_background,
        paper_bgcolor=colour_background,
        font=dict(
        color=colour_text
        ),
        showlegend=False
    )
    return fig

# scatter
def initial_scatter(df, xvar, yvar, height, width):
    title=f"Scatterplot of {xvar} vs {yvar}"
    fig = px.scatter(df, x=xvar, y=yvar,  color_discrete_sequence=[colour_scatter], trendline="ols", trendline_color_override=colour_trendline)
    fig.update_layout(
        height=height,
        width=width,
        title={
            "text": title,
            "x": 0.5
        },
        xaxis_title=xvar,
        yaxis_title=yvar,
        plot_bgcolor=plot_background,
        paper_bgcolor=colour_background,
        font=dict(
        color=colour_text
        )
    )
    return fig

# mode
def mode_calc(df, var):
    counts = df[var].value_counts()
    modes = counts[counts == counts.max()].index.tolist()
    modes_string = str(modes)
    modes_string = modes_string.lstrip("[").rstrip("]")
    return modes_string

### Initial Dashboard | Analyisng Continuous and Categorical Features

In [8]:
def initial_dashboard(df, port):
    # Continuous and categorical features
    categorical_columns = df.select_dtypes(include=['category', 'object']).columns.tolist()
    continuous_columns = df.select_dtypes(include=['float64', 'int32']).columns.tolist()
    # Load the Plotly extension
    pn.extension('plotly')

    # Plot 1: Scatter
    figs = {}
    # Loop through the columns and create histograms
    for i, c in enumerate(continuous_columns):
        figs[f"fig_h{i+1}"] = initial_scatter(df, c, target, plot_height, plot_width)
    # Define the tabs
    tabs = pn.Tabs(*[(c, f) for c, f in zip(continuous_columns, figs.values())])
    # Markdown title
    title = pn.pane.Markdown("#### Continuous variabes vs. {target} Scatter", style={"color":colour_text} )
    # create the plot
    plot1 = pn.Column(title, tabs)

    # Plot 2: Histogram
    figs = {}
    # Loop through the columns and create histograms
    for i, c in enumerate(categorical_columns):
        figs[f"fig_h{i+1}"] = initial_histogram(df, c, plot_height, plot_width)
    # Define the tabs
    tabs = pn.Tabs(*[(c, f) for c, f in zip(categorical_columns, figs.values())])
    # Markdown title
    title = pn.pane.Markdown("#### Categorical Variables Histogram", style={"color":colour_text})
    # create the plot
    plot2 = pn.Column(title, tabs)

    # Plot 3: Information Continuous Features
    # Create a dictionary of Markdown widgets, one for each variable
    markdown_widgets = {c: pn.pane.Markdown(f"### <span style='color:{colour_text}'>**Feature**: {c}<br>"
                                            f"**Min**: {df[c].min()}<br>"
                                            f"**Max**: {df[c].max()}<br>"
                                            f"**Range**: {(df[c].max()-df[c].min())}<br>"
                                            f"**Median**: {df[c].median()}<br>"
                                            f"**Mean**: {df[c].mean()}<br>"
                                            f"**Mode(s)**: {mode_calc(df,c)}<br>"
                                            f"**Standard Deviation**: {df[c].std()}<br>"
                                            f"**Correlation with {target}**: {df[c].corr(df[target])}"
                                            ) 
                                            for c in continuous_columns}

    # Combine the existing boxplot tabs with the new Markdown tabs into a single Tabs layout
    tabs = pn.Tabs(*[(f"{c} Info", markdown_widgets[c]) for c in continuous_columns])
    # Define the title and combine it with the Tabs using a row and column
    title = pn.pane.Markdown("#### Continuous variabes Information", style={"color":colour_text})
    plot3 = pn.Column(title, tabs)

    # Plot 4: Information Categorigal Features
    # Create a dictionary of Markdown widgets, one for each variable
    markdown_widgets = {c: pn.pane.Markdown(f"### <span style='color:{colour_text}'>**Feature**: {c}<br>"
                                            f"**Mode(s)**: {mode_calc(df,c)}<br>"
                                            f"**Frequency**: {df[c].nunique()}") for c in categorical_columns}
    # Combine the existing boxplot tabs with the new Markdown tabs into a single Tabs layout
    tabs = pn.Tabs(*[(f"{c} Info", markdown_widgets[c]) for c in categorical_columns])
    # Define the title and combine it with the Tabs using a row and column
    title = pn.pane.Markdown("#### Categorical variabes Information", style={"color":colour_text})
    plot4 = pn.Column(title, tabs)

    # Grid
    title = pn.pane.Markdown('Initial Exploration of Dataset Features', style={'font-size': '32px', 'font-weight': 'bold', "color": colour_text})
    # Create a grid of the four plots
    grid = pn.GridSpec(ncols=2, nrows=2, height=grid_height, width=grid_width, margin=(20, 20, 20, 20))
    grid[0, 0] = plot1
    grid[0, 1] = plot2
    grid[1, 0] = plot3
    grid[1, 1] = plot4
    dash = pn.Column(title,
                          grid,
                          background=colour_background,
                          )
    pn.serve(dash, port=port, show=False)

def display_initial_dash(df):
    port = random.randint(1024, 65535)
    initial_dashboard(df, port)
    return IFrame(f'http://localhost:{port}/', width=grid_width, height=grid_height)

### Display the Dashboard

In [9]:
display_initial_dash(df)

Launching server at http://localhost:54537
