In [None]:
# Combined Bore Dataset
import pandas as pd

# Define file path
bore_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Riley_O'Rorke_CombinedBoreData.csv"

# Load dataset (ensure ResultDataValue2 is loaded as a string)
bore_df = pd.read_csv(bore_data_path, dtype={'ResultDataValue2': str})

# Select only the relevant columns
bore_filtered_df = bore_df[[ 
    'AssemblyName', 'NamePostfix', 'DepartmentName', 'StationName', 
    'StationDescription', 'ParameterName3', 'ResultDataValue2', 
    'ResultEntryTimestamp3', 'ResultDataValue3', 'MinValue', 'MaxValue'
]]

# Convert timestamp column to datetime format
bore_filtered_df['ResultEntryTimestamp3'] = pd.to_datetime(bore_filtered_df['ResultEntryTimestamp3'], errors='coerce')

# Ensure ResultDataValue2 remains a string (part numbers)
bore_filtered_df['ResultDataValue2'] = bore_filtered_df['ResultDataValue2'].astype(str)

# Convert numeric columns to float (handle errors gracefully)
bore_filtered_df[['ResultDataValue3', 'MinValue', 'MaxValue']] = bore_filtered_df[
    ['ResultDataValue3', 'MinValue', 'MaxValue']
].apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values in key measurement columns
bore_filtered_df.dropna(subset=['ResultDataValue3', 'MinValue', 'MaxValue'], inplace=True)

# Save the cleaned dataset (optional)
cleaned_bore_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Cleaned_BoreData.csv"
bore_filtered_df.to_csv(cleaned_bore_data_path, index=False)

# Display first few rows for verification
print(bore_filtered_df.head(10))


  AssemblyName    NamePostfix  ... MinValue MaxValue
0     D0815479  Front Chassis  ...   94.960   95.040
1     D0815479  Front Chassis  ...   94.960   95.040
2     D0815479  Front Chassis  ...   94.960   95.040
3     D0815479  Front Chassis  ...   94.960   95.040
4     D0815479  Front Chassis  ...   94.960   95.040
5     D0815479  Front Chassis  ...   90.079   90.155
6     D0815479  Front Chassis  ...   90.079   90.155
7     D0815479  Front Chassis  ...   90.079   90.155
8     D0815479  Front Chassis  ...  101.572  101.652
9     D0815479  Front Chassis  ...  101.572  101.652

[10 rows x 11 columns]


In [None]:
# Tools Dataset
import pandas as pd

# Define file paths
tools_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Riley_O'Rorke_Tools.csv"

# Load dataset
tools_df = pd.read_csv(tools_data_path)

# Select only the relevant columns and rename StationID to StationId
tools_filtered_df = tools_df[['ToolID', 'ModelID', 'StationID', 'Name', 'ManufacturerID', 'ModelNumber', 'ManufacturerName']]
tools_filtered_df = tools_filtered_df.rename(columns={'StationID': 'StationId'})

# Save the cleaned dataset (optional)
cleaned_tools_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Cleaned_Tools.csv"
tools_filtered_df.to_csv(cleaned_tools_data_path, index=False)

# Display the first few rows
print(tools_filtered_df.head())


   ToolID  ModelID  StationId  ... ManufacturerID  ModelNumber ManufacturerName
0       5        4         82  ...              3          377          Diatest
1      14        8        474  ...              6  DS-B5-10103           Dorsey
2      15        8        474  ...              6  DS-B5-10103           Dorsey
3      16        9        474  ...              6  DS-B5-10104           Dorsey
4      17        9        474  ...              6  DS-B5-10104           Dorsey

[5 rows x 7 columns]


In [None]:
# Weld Reclaim Dataset
import pandas as pd

# Define file path
weld_reclaim_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Riley_O'Rorke_WeldReclaim.csv"

# Load dataset
weld_df = pd.read_csv(weld_reclaim_path)

# Filter dataset: Keep only rows where 'Description' matches the required text and 'DataValue' is 'Yes'
weld_filtered_df = weld_df[
    (weld_df['Description'] == "Select whether or not the weldment needs to go to weld reclaim for rework.") &
    (weld_df['DataValue'] == "Yes")
][['AssemblyID', 'DataValue']]

# Rename 'DataValue' to 'CoolantConcentration'
weld_filtered_df.rename(columns={'DataValue': 'WeldReclaim'}, inplace=True)

# Prepend "D0" to AssemblyID values
weld_filtered_df['AssemblyID'] = "D0" + weld_filtered_df['AssemblyID'].astype(str)

# Save the cleaned dataset (optional)
cleaned_weld_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Cleaned_WeldReclaim.csv"
weld_filtered_df.to_csv(cleaned_weld_data_path, index=False)

# Display first few rows for verification
print(weld_filtered_df.head())


    AssemblyID WeldReclaim
20    D0677972         Yes
158   D0682051         Yes
254   D0707175         Yes
469   D0734371         Yes
546   D0747091         Yes


In [None]:
# Coolant Dataset
import pandas as pd

# Define file path
coolant_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Riley_O'Rorke_Coolant.csv"

# Load dataset
coolant_df = pd.read_csv(coolant_data_path)

# Filter dataset: Keep only rows where 'Name' is "Record Coolant Concentration"
coolant_filtered_df = coolant_df[coolant_df['Name'] == "Record Coolant Concentration"]

# Select only the relevant columns
coolant_filtered_df = coolant_filtered_df[['AssemblyID', 'DataValue', 'StationId']]

# Rename 'DataValue' to 'CoolantConcentration'
coolant_filtered_df.rename(columns={'DataValue': 'CoolantConcentration'}, inplace=True)

# Prepend "D0" to AssemblyID values
coolant_filtered_df['AssemblyID'] = "D0" + coolant_filtered_df['AssemblyID'].astype(str)

# Save the cleaned dataset (optional)
cleaned_coolant_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Cleaned_Coolant.csv"
coolant_filtered_df.to_csv(cleaned_coolant_data_path, index=False)

# Display first few rows for verification
print(coolant_filtered_df.head())


    AssemblyID CoolantConcentration  StationId
1     D0113175                  3.5        204
41    D0117941                  5.6        204
66    D0126382                  5.6        204
74    D0127643                  5.2        204
104   D0155466                  5.5        204


In [None]:
import pandas as pd

# Define file paths
bore_data_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Cleaned_BoreData.csv"
merged_tools_coolant_weld_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Merged_Tools_Coolant_Weld.csv"

# Load datasets
bore_df = pd.read_csv(bore_data_path)
merged_tools_coolant_weld_df = pd.read_csv(merged_tools_coolant_weld_path)

# Rename 'AssemblyName' in Bore Data to match 'AssemblyID'
bore_df = bore_df.rename(columns={'AssemblyName': 'AssemblyID'})

# Merge Bore Data with the Tools + Coolant + Weld dataset on AssemblyID
final_complete_df = bore_df.merge(merged_tools_coolant_weld_df, on='AssemblyID', how='left')

# Save the fully merged dataset (optional)
final_complete_path = r"G:\College\University of Montana\Semester 4\Capstone\MSBA-Capstone-Riley-ORorke\data\Final_Merged_Dataset.csv"
final_complete_df.to_csv(final_complete_path, index=False)

# Display first few rows for verification
print(final_complete_df.head())




  AssemblyID    NamePostfix  ... CoolantConcentration WeldReclaim
0   D0815479  Front Chassis  ...                  NaN         NaN
1   D0815479  Front Chassis  ...                  NaN         NaN
2   D0815479  Front Chassis  ...                  NaN         NaN
3   D0815479  Front Chassis  ...                  NaN         NaN
4   D0815479  Front Chassis  ...                  NaN         NaN

[5 rows x 20 columns]
