### Read the dataframe by brute-force and check the reformating column names.

In [210]:
import polars as pl
import re
import numpy as np


def reformat_column_names(col_name: str) -> str:
    """A function for reformating the names of the columns. If some column' name is made up of words, seperate these words by _."""

    reformatted_name = re.sub(r"(?<![A-Z])(?<!^)([A-Z])", repl = r"_\1", string = col_name)



    return reformatted_name

laptop_data : pl.DataFrame = pl.read_csv(source = "laptopData.csv", # Read the data by brute force.
                                         ignore_errors = True)


stare = laptop_data.columns
data_cols: list[str] = laptop_data.columns

data_cols[data_cols.index("Unnamed: 0")] = "OrderID"

laptop_data.columns = np.vectorize(reformat_column_names)(data_cols)

data_cols: list[str] = laptop_data.columns

### Dealing with missing values.

In [None]:
def null_values_count(df:pl.DataFrame) -> list[str]:
    '''For each column of the dataframe, tell me the number of missing values.
    Return the list of columns with missing values.
    '''

    null_columns:list[str] = []

    for col in df.columns: # #Iterate over the columns
        n_nulls: int = df[col].null_count()

        if n_nulls > 0:
            null_columns.append((col, n_nulls))


    return null_columns


def null_value_displayer(df:pl.DataFrame, when: str = "before") -> None:
    null_columns = null_values_count(df)

    if null_columns: # If there are any null-valued column, display all of them.
        print(f"Columns with missing values {when} dropping null values:")
        print(null_columns)
        print('\n')



def drop_missing_values(df:pl.DataFrame) -> pl.DataFrame:
    """Drop the missing rows (that is - rows with all columns missing).
    Moreover, for each column find the number of missing values.
    """

    null_value_displayer(df)

    df_dropped = df.drop_nulls()
    
    null_value_displayer(df_dropped, "after")

    return df_dropped

  
    

laptop_data = drop_missing_values(laptop_data)


Columns with missing values before dropping null values:
[('Order_ID', 30), ('Company', 30), ('Type_Name', 30), ('Inches', 31), ('Screen_Resolution', 30), ('Cpu', 30), ('Ram', 30), ('Memory', 30), ('Gpu', 30), ('Op_Sys', 30), ('Weight', 30), ('Price', 30)]


