JupyterLab Notebook Examples:  
Verbose REFERENTIAL material to be utilized in a LINUX environment.  

GOTCHA - Be mindful of the Default Working Directory setting

Nonexistent directory './data' will be created in
the Default Working Directory.

Nonexistent file 'MixedData.csv' will be written in './data'.  
File 'TransformedData.csv' will be written in './data'.

In [1]:
'''
'CSV_2_OneHotEncoded_2_CSV.ipynb'      

if CSV file './data/MixedData.csv' exists:
    jump to 'HouseKeepingEnded'

Create dictionary 'dic_mixed'

Create DataFrame 'df_mixed' from dictionary 'dic_mixed'.

Write file './data/MixedData.csv'.

            HouseKeepingEnded:

Create DataFrame 'df_data' by reading file
    './data/MixedData.csv'.
    
Extract columns (numeric vs object) from DataFrame 'df_data'.

Initialize the encoder, replacing deprecated
  'sparse=False' with 'sparse_output=False.

Fit and transform yielding 'enc'.

Get feature names.

Convert transformed array to a DataFrame.

Concatenate encoded DataFrame to numerical DataFrame.

Write file './data/TransformedData.csv'.

Delete all created DataFrames from memory.
'''
#
from IPython.display import display
from pathlib import Path
from sklearn.preprocessing import OneHotEncoder
import os
import pandas as pd
import time
#

# Create empty DataFrame 'df_mixed'to facilitate deletion WITHOUT
#   an error if CSV file './data/MixedData.csv' exists and
#   DataFrame 'df_mixed' was not populated from a dynamically created
#   csv file.
df_mixed= pd.DataFrame()
f_created_csv = False
name_dir = './data'
name_filepath = './data/MixedData.csv'
name_transformed_filepath = './data/TransformedData.csv'
# Ensure that the target directory exists.
os.makedirs(name_dir, exist_ok=True)
if os.path.exists(name_dir):
    os.chmod(name_dir, 0o755)
# If CSV file './data/MixedData.csv' was not found,
#    create it.
if os.path.exists(name_filepath):
    os.chmod(name_filepath, 0o444)
else:
    f_created_csv = True

    # Create dictionary 'dic_,ixed'.
    dic_mixed = {
        'Count': [3, 4, 5, 6, 7,],
        'Item': ['coat', 'shirt', 'coat', 'beanie', 'shirt',],
        'Size': ['large', 'medium', 'medium', 'small', 'large',],
        'Color': ['red', 'yellow', 'blue', 'yellow','red',],
    }
    # Create DataFrame 'df_mixed' from dictionary 'dic_mixed'..
    df_mixed = pd.DataFrame(dic_mixed)
     
    # Save the DataFrame as a CSV file.  
    # Update './data' datetime stamp
    if os.path.exists(name_filepath):
        os.chmod(name_filepath, 0o644)
    with open(
        name_filepath, 'w'
    ) as writer:
        df_mixed.to_csv(writer)
        if os.path.exists(name_filepath):
            os.chmod(name_filepath, 0o444)
            
            # Get then display modified time info
            ti_m = os.path.getmtime(name_filepath)
            m_ti = time.ctime(ti_m)
            Path(name_dir).touch()
            print(f"\nFile '{name_filepath}' was CREATED: {m_ti}") 
        
            # HouseKeepingEnded:

# Create DataFrame 'df_mixed' by reading
#   './data/MixedData.CSV'.
df_data = pd.read_csv(name_filepath, index_col=0)
if f_created_csv:
    print('As a dynamically generated CSV file\n')
print('df_data.info():')
display(df_data.info())
display(df_data)

# Extract columns (numeric vs object) from DataFrame 'df_data'.
X_digits = df_data.select_dtypes(exclude='object')
X_non_digits = df_data.select_dtypes(include='object')
# display(X_digits)
# display(X_non_digits)

# Initialize the encoder, replacing deprecated
#   'sparse=False' with 'sparse_output=False.
# Fit and transform yielding 'enc'.
enc = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_encoded = enc.fit_transform(X_non_digits)

# Get feature names.
# Convert transformed array to a DataFrame.
feature_names = enc.get_feature_names_out(X_non_digits.columns)
df_X_encoded = pd.DataFrame(
    X_encoded, columns=feature_names
)

# Concatenate encoded DataFrame to numerical DataFrame.
df_transformed = pd.concat([X_digits, df_X_encoded], axis=1)
display(df_transformed)

# Ensure that file './data/TransformedData.csv' is writeable if
#   it exists
# Write file './data/TransformedData.csv'.
if os.path.exists(name_transformed_filepath):
    os.chmod(name_transformed_filepath, 0o644)
with open(
    name_transformed_filepath, 'w'
) as writer:
    df_transformed.to_csv(writer)
    if os.path.exists(name_transformed_filepath):
        os.chmod(name_transformed_filepath, 0o444)
        
        # Get then display modified time info
        ti_m = os.path.getmtime(name_transformed_filepath)
        m_ti = time.ctime(ti_m)
        Path(name_dir).touch()
        print(f"\nFile '{name_transformed_filepath}' was CREATED: {m_ti}") 

# Delete DataFrames 'df_X_encoded', 'df_data', df_mixed'
#   and 'df_transformed' from memory.
del df_X_encoded
del df_data
del df_mixed
del df_transformed

# Get time info.
time_local = time.localtime()
time_string = time.strftime('%Y-%m-%d %H:%M:%S %Z %z', time_local)
print(f'\n{time_string}')
# EOF


File './data/MixedData.csv' was CREATED: Tue Oct  8 19:32:15 2024
As a dynamically generated CSV file

df_data.info():
<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Count   5 non-null      int64 
 1   Item    5 non-null      object
 2   Size    5 non-null      object
 3   Color   5 non-null      object
dtypes: int64(1), object(3)
memory usage: 200.0+ bytes


None

Unnamed: 0,Count,Item,Size,Color
0,3,coat,large,red
1,4,shirt,medium,yellow
2,5,coat,medium,blue
3,6,beanie,small,yellow
4,7,shirt,large,red


Unnamed: 0,Count,Item_beanie,Item_coat,Item_shirt,Size_large,Size_medium,Size_small,Color_blue,Color_red,Color_yellow
0,3,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
2,5,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,6,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,7,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0



File './data/TransformedData.csv' was CREATED: Tue Oct  8 19:32:15 2024

2024-10-08 19:32:15 PDT -0700
