In [16]:
# merge material quantity
# make quantatative factors

import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer


df = pd.read_csv('cleaned.csv')
categorical_cols = ['User Type', 'Material 1', 'Material 2', 'Select Printer', 'Printer Requested', 'Printed On','Material_Category1', 'Sliced by']

# Convert categorical variables to numerical using Label Encoding
label_encoders = {}
value_mappings = {}

for col in categorical_cols:
    # Create a label encoder for each categorical column
    label_encoders[col] = LabelEncoder()
    
    # Fill missing values with 'missing' before encoding
    df[col] = df[col].fillna('missing')
    
    # Fit and transform the column
    df[col] = label_encoders[col].fit_transform(df[col])
    
    # Store the mapping between original values and encoded values
    value_mappings[col] = dict(zip(label_encoders[col].classes_, 
                                 label_encoders[col].transform(label_encoders[col].classes_)))

# Display the mappings for each categorical column
for col, mapping in value_mappings.items():
    print(f"\nMapping for {col}:")
    for original, encoded in mapping.items():
        print(f"{original}: {encoded}")


Mapping for User Type:
Faculty / Staff: 0
Graduate: 1
Undergraduate: 2

Mapping for Material 1:
Bambu Labs ABS: 0
Bambu Labs ABS-GF: 1
Bambu Labs ASA: 2
Bambu Labs PC: 3
Bambu Labs PETG Basic: 4
Bambu Labs PETG HF: 5
Bambu Labs PETG Translucent: 6
Bambu Labs PLA Basic: 7
Bambu Labs PLA Matte: 8
Bambu Labs PLA-CF: 9
Bambu Labs TPU 95A HF: 10
Formlabs BioMed Clear (v1): 11
Formlabs Black (v4): 12
Formlabs Black (v5): 13
Formlabs Clear (v3): 14
Formlabs Durable (v2): 15
Formlabs Elastic 50A (v1): 16
Formlabs Flexible 80A (v2): 17
Formlabs High Temp (v2): 18
Formlabs Rigid 4000 (v1): 19
Formlabs Tough 1500 (v1): 20
Formlabs Tough 1500 (v1.1): 21
Formlabs White (v4): 22
Formlabs White (v5): 23
Fromlabs Clear (v5): 24
Markforged Onyx: 25
Nylon 12: 26
Stratasys ABS M30: 27
Stratasys PC-ABS: 28
Ultimaker Tough PLA: 29
missing: 30

Mapping for Material 2:
Bambu Labs ABS: 0
Bambu Labs ASA: 1
Bambu Labs PC: 2
Bambu Labs PETG Basic: 3
Bambu Labs PETG Translucent: 4
Bambu Labs PLA Basic: 5
Bambu L

In [17]:
df.head()

Unnamed: 0,Row,Name,Select Printer,Slicer Project File,Printer Requested,User Email,Printer Note,Material 1 Qty,User Type,Material 1,...,Material 2,Material 2 Cost/Unit.1,Printed On.1,Print Cost,Sliced by,Print Time (Hours),Print ID,Creation Log,Material_Category1,Material_Category2
0,7,FMEP Cubes 1,20,https://uw-makerspace-team.monday.com/protecte...,0,bmdavidson2@wisc.edu,X1E - ABS White/ASA White,420.82,1,0,...,1,0.05,Bambu Labs,21.04,1,14,8171785086,Design Innovation Lab Undergraduate Staff Jan ...,0,ABS
1,8,AED_Drone_Parts_1,35,https://uw-makerspace-team.monday.com/protecte...,0,lprodgers@wisc.edu,X1C - Black PLA/White PLA,55.0,1,8,...,6,0.05,Bambu Labs,2.75,0,2,8214378089,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
2,9,AED_Drone_Parts_2,24,https://uw-makerspace-team.monday.com/protecte...,0,lprodgers@wisc.edu,X1C - Blue PLA/Gray PLA,97.0,1,8,...,6,0.05,Bambu Labs,4.85,1,3,8214392486,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
3,10,Top-Down Mount v1,23,https://uw-makerspace-team.monday.com/protecte...,0,sifferman@wisc.edu,X1C - White PLA/Purple PLA,274.64,1,8,...,6,0.05,Bambu Labs,13.73,1,9,8238119583,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
4,11,BES Base,43,https://uw-makerspace-team.monday.com/protecte...,0,baschmall@wisc.edu,X1C - White ASA/Teal ASA,36.5,1,8,...,6,0.05,Bambu Labs,1.83,1,1,8269757333,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA


In [18]:
df.drop(columns=['Row','Name', 'Slicer Project File', 'Printer Note', 'Material 2 Cost/Unit.1', 'Material 2 Qty', 'Material 2', 'Material 2 Cost/Unit', 'Printed On.1', 'Print Cost', 'Sliced by', 'Print ID'], inplace=True)

In [19]:
df.head()

Unnamed: 0,Select Printer,Printer Requested,User Email,Material 1 Qty,User Type,Material 1,Printed On,Print Time (Hours),Creation Log,Material_Category1,Material_Category2
0,20,0,bmdavidson2@wisc.edu,420.82,1,0,0,14,Design Innovation Lab Undergraduate Staff Jan ...,0,ABS
1,35,0,lprodgers@wisc.edu,55.0,1,8,0,2,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
2,24,0,lprodgers@wisc.edu,97.0,1,8,0,3,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
3,23,0,sifferman@wisc.edu,274.64,1,8,0,9,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA
4,43,0,baschmall@wisc.edu,36.5,1,8,0,1,Design Innovation Lab Undergraduate Staff Jan ...,3,PLA


In [20]:
df.drop(columns=['User Email','Creation Log','Material_Category2'], inplace=True)

In [21]:
df.head()

Unnamed: 0,Select Printer,Printer Requested,Material 1 Qty,User Type,Material 1,Printed On,Print Time (Hours),Material_Category1
0,20,0,420.82,1,0,0,14,0
1,35,0,55.0,1,8,0,2,3
2,24,0,97.0,1,8,0,3,3
3,23,0,274.64,1,8,0,9,3
4,43,0,36.5,1,8,0,1,3


In [22]:
df.describe()

Unnamed: 0,Select Printer,Printer Requested,Material 1 Qty,User Type,Material 1,Printed On,Print Time (Hours),Material_Category1
count,1928.0,1928.0,1928.0,1928.0,1928.0,1928.0,1928.0,1928.0
mean,22.877593,0.247407,76.978828,1.68361,9.592324,0.254149,3.958506,2.630187
std,10.477324,0.808937,113.151431,0.604001,5.732652,0.829986,5.590909,1.103216
min,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,18.0,0.0,7.9725,2.0,8.0,0.0,1.0,3.0
50%,24.0,0.0,34.505,2.0,8.0,0.0,2.0,3.0
75%,29.0,0.0,97.84,2.0,8.0,0.0,4.0,3.0
max,43.0,5.0,973.7,2.0,30.0,6.0,60.0,4.0


In [28]:
df.to_csv('PCA_ready.csv', index=False)

In [25]:

df_map = pd.DataFrame(value_mappings)

df_map.to_csv('value_mappings.csv', index=True)