In [2]:
# Necessary libraries
import pandas as pd
import sqlite3
import datetime

In [3]:
#Options to see all columns in dataframe
pd.set_option('display.max_columns', None)

In [4]:
# Connect to the SQLite database file
conn = sqlite3.connect(r'C:\Users\klaud\Desktop\Code\Data\Ultimaker_API.db')
# Function that fetches all sqlite tables
def sql_fetch(conn):

    c = conn.cursor()

    c.execute('SELECT name from sqlite_master where type= "table"')

    print(c.fetchall())

sql_fetch(conn)

[('U_1',), ('U_delays_1',), ('U_2',), ('U_delays_2',), ('U_3',), ('U_delays_3',), ('U_4',), ('U_delays_4',)]


In [5]:
#U_2, U_3, and U_4 represents the succeded prints
#Selecting the dataset of part 1
df1 = pd.read_sql_query("SELECT * FROM U_2", conn)
df1_delays = pd.read_sql_query("SELECT * FROM U_delays_2", conn)
#Merging delays and print data, without selecting date as it will be duplicate
df1 = pd.concat([df1, df1_delays[['timetime','timeperf_counter']]], axis=1)

In [6]:
df1

Unnamed: 0,Build_platform_temperature,Build_platform_target_temperature,Hot_end1_temperature,Hot_end1_target_temperature,Hot_end1_fan,Head_acceleration,Feeder_acceleration,Feeder_jerk,Feeder_max_speed,Z_offset,X_coordinate,Y_coordinate,Z_coordinate,Hot_end1_material_extruded,Hot_end1_max_temperature_exposed,Hot_end1_time_spent_hot,Status,Progress,Object,Date,timetime,timeperf_counter
0,54.68852212310416,60.0,112.2,0.0,0.0,3000.0,3000.0,5.0,45.0,1.4726361721589676,208.0,194.0,310.0,25300,237,115920,printing,0.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 09:48:38,1.690904,1.690936
1,55.17920844531264,60.0,110.7,0.0,0.0,3000.0,3000.0,5.0,45.0,1.4726361721589676,208.0,194.0,310.0,25300,237,115920,printing,0.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 09:48:40,1.330819,1.331020
2,55.615270599048834,60.0,109.2,0.0,0.0,3000.0,3000.0,5.0,45.0,1.4726361721589676,110.0,20.0,310.0,25300,237,115920,printing,0.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 09:48:42,0.904637,0.904793
3,56.10791188919511,60.0,108.2,0.0,0.0,3000.0,3000.0,5.0,45.0,1.4726361721589676,110.0,20.0,310.0,25300,237,115920,printing,0.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 09:48:45,1.289626,1.289712
4,56.76343674501875,60.0,107.0,0.0,0.0,3000.0,3000.0,5.0,45.0,1.4726361721589676,110.0,20.0,310.0,25300,237,115920,printing,0.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 09:48:47,0.928618,0.929469
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4181,59.827534538942686,60.0,200.0,200.0,100.0,800.0,3000.0,5.0,45.0,1.4726361721589676,135.12,87.675,21.0,29140,237,125100,printing,0.9989389946775484,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 12:21:44,0.803636,0.812851
4182,59.61172206090377,60.0,200.0,200.0,100.0,5000.0,3000.0,5.0,45.0,1.4726361721589676,134.42,88.079,21.0,29140,237,125100,printing,0.9992233502221175,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 12:21:47,1.578487,1.588303
4183,59.77240066315363,60.0,200.0,200.0,100.0,5000.0,3000.0,5.0,45.0,1.4726361721589676,133.875,88.663,21.0,29140,237,125100,printing,0.9994412328004627,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 12:21:49,0.937036,0.943997
4184,59.83056326366206,60.0,200.0,200.0,100.0,5000.0,3000.0,5.0,45.0,1.4726361721589676,110.713,87.833,21.0,29140,237,125100,printing,0.9996807499216221,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 12:21:51,1.157489,1.162981


# Identifying start and end of the process 

In [14]:
# By inspecting dataframe
# "Hot_end_target_temperature1" has not been set to the targeted temperature of 200°C 
# "Progress" is 0 
# "Z_coordinate" is also at 310mm
# All of theese variables indicate that the process has not started

#Convertint values of interest into number format
df1 = df1.astype({"Z_coordinate": float, "Progress": float, 'Hot_end1_temperature': float})

#Inspecting progress 
df_filtered1 = df1[df1['Progress'] > 0]

#We can still see that Z coordinate is not at the expected starting height of 0.2mm 
#Identifying the first occurance of height below 0.21mm which is only 0.2mm for first layer
idx = (df_filtered1['Z_coordinate'] < 0.21).idxmax() 
df_filtered1 = df1.loc[idx:]

#Ending of the process seems proper, therefore needs no cleaning


### Selecting Data from second and third print and performing the same steps. However, each dataset was looked through separately, in order to confirm that identifying of process start and end is the same for all datasets

In [15]:
#Selecting the dataset of part 2
df2 = pd.read_sql_query("SELECT * FROM U_3", conn)
df2_delays = pd.read_sql_query("SELECT * FROM U_delays_3", conn)
df2 = pd.concat([df2, df2_delays[['timetime','timeperf_counter']]], axis=1)


df2 = df2.astype({"Z_coordinate": float, "Progress": float, 'Hot_end1_temperature': float})
df_filtered2 = df2[df2['Progress'] > 0]
idx = (df_filtered2['Z_coordinate'] < 0.21).idxmax() 
df_filtered2 = df2.loc[idx:]

# However here, the last row of "Z_coordinate" is 23mm which means process is probably over, as part height is only 21mm
df_filtered2[-5:]

Unnamed: 0,Build_platform_temperature,Build_platform_target_temperature,Hot_end1_temperature,Hot_end1_target_temperature,Hot_end1_fan,Head_acceleration,Feeder_acceleration,Feeder_jerk,Feeder_max_speed,Z_offset,X_coordinate,Y_coordinate,Z_coordinate,Hot_end1_material_extruded,Hot_end1_max_temperature_exposed,Hot_end1_time_spent_hot,Status,Progress,Object,Date,timetime,timeperf_counter
4538,59.939063781689,60.0,200.0,200.0,100.0,400.0,3000.0,5.0,45.0,1.4726361721589676,108.177,87.461,21.0,32960,237,134520,printing,0.999,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 14:59:02,0.815343,0.822878
4539,59.85656228960962,60.0,200.0,200.0,100.0,1000.0,3000.0,5.0,45.0,1.4726361721589676,134.42,90.387,21.0,32960,237,134520,printing,0.999216,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 14:59:04,0.947228,0.955331
4540,59.94461567666482,60.0,200.0,200.0,100.0,5000.0,3000.0,5.0,45.0,1.4726361721589676,133.454,88.663,21.0,32960,237,134520,printing,0.999418,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 14:59:06,0.865942,0.873396
4541,59.792532162569664,60.0,200.0,200.0,100.0,5000.0,3000.0,5.0,45.0,1.4726361721589676,110.355,88.377,21.0,32960,237,134520,printing,0.99966,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 14:59:08,1.163949,1.164573
4542,59.84534797556932,60.0,200.0,200.0,100.0,1000.0,3000.0,5.0,45.0,1.4726361721589676,134.256,100.656,23.0,32960,237,134520,printing,1.0,UM3E_Leirmo_Exp1_Main_Artifact,2021-05-10 14:59:10,1.51914,1.530903


In [16]:
#Dropping last row when Z_coorindate is 23mm 
df_filtered2 = df_filtered2.iloc[:-1,:]

In [17]:
#Selecting the dataset of part 3
df3 = pd.read_sql_query("SELECT * FROM U_4", conn)
df3_delays = pd.read_sql_query("SELECT * FROM U_delays_4", conn)
#Merging delays and print data, without selecting date as it will be duplicate
df3 = pd.concat([df3, df3_delays[['timetime','timeperf_counter']]], axis=1)

df3 = df3.astype({"Z_coordinate": float, "Progress": float, 'Hot_end1_temperature': float})
df_filtered3 = df3[df3['Progress'] > 0]
idx = (df_filtered3['Z_coordinate'] < 0.21).idxmax() 
df_filtered3 = df3.loc[idx:]


# Merging dataframes

In [18]:
#Adding column which identifies part number
df_filtered1 = df_filtered1.assign(Part = 1)
df_filtered2 = df_filtered2.assign(Part = 2)
df_filtered3 = df_filtered3.assign(Part = 3)


#Merge data frames and generate new indexes 
data_frames = [df_filtered1, df_filtered2, df_filtered3]
df = pd.concat(data_frames,ignore_index=True)


In [19]:
#Checking for missing values "NaN"
df.isna().any()

Build_platform_temperature           False
Build_platform_target_temperature    False
Hot_end1_temperature                 False
Hot_end1_target_temperature          False
Hot_end1_fan                         False
Head_acceleration                    False
Feeder_acceleration                  False
Feeder_jerk                          False
Feeder_max_speed                     False
Z_offset                             False
X_coordinate                         False
Y_coordinate                         False
Z_coordinate                          True
Hot_end1_material_extruded           False
Hot_end1_max_temperature_exposed     False
Hot_end1_time_spent_hot              False
Status                               False
Progress                             False
Object                               False
Date                                 False
timetime                             False
timeperf_counter                     False
Part                                 False
dtype: bool

In [20]:
# Z_coorindate contains missing values, investigating where it occurs
df[['Date','Progress','Z_coordinate','X_coordinate','Y_coordinate', 'Part']][df['Z_coordinate'].isna()]

Unnamed: 0,Date,Progress,Z_coordinate,X_coordinate,Y_coordinate,Part
1126,2021-05-10 10:32:31,0.270116,,65.603,89.26,1
1603,2021-05-10 10:50:16,0.389526,,138.545,138.053,1
2408,2021-05-10 11:20:07,0.585276,,130.683,86.821,1
3409,2021-05-10 11:56:50,0.832806,,104.426,108.44,1
3886,2021-05-10 12:14:20,0.949331,,72.836,104.315,1
4499,2021-05-10 12:42:30,0.094451,,134.882,83.008,2
6544,2021-05-10 13:49:27,0.535727,,131.865,123.479,2
6926,2021-05-10 14:01:41,0.615686,,91.738,114.775,2
7097,2021-05-10 14:07:07,0.651439,,75.477,99.676,2
8157,2021-05-10 14:45:46,0.911056,,75.083,115.832,2


In [21]:
# Sorting data based on date time
#Converting date to datetime format
df.loc[:,'Date'] = pd.to_datetime(df['Date'])
#Assuring that the data is sorted in asceding order according to date time
df = df.sort_values(by='Date',ascending=True)

In [22]:
#Renaming so all datasets contain same names of same data types
df = df.rename(columns={'Hot_end1_temperature':'Hot_end_temperature','Hot_end1_target_temperature':'Hot_end_target_temperature','Hot_end1_fan': 'Fan_speed'})

#Same step is performed for data of part 1-3 for temperature data
df_filtered1 = df_filtered1.rename(columns={'Hot_end1_temperature':'Hot_end_temperature','Hot_end1_target_temperature':'Hot_end_target_temperature'})
df_filtered2 = df_filtered2.rename(columns={'Hot_end1_temperature':'Hot_end_temperature','Hot_end1_target_temperature':'Hot_end_target_temperature'})
df_filtered3 = df_filtered3.rename(columns={'Hot_end1_temperature':'Hot_end_temperature','Hot_end1_target_temperature':'Hot_end_target_temperature'})

In [23]:
#Static values should contain only one unique value, therefore investigating if that is the case here
for k, v in df[['Build_platform_target_temperature',
                 'Hot_end_target_temperature',
                 'Z_offset',
                 'Status',
                 'Object']].nunique().to_dict().items():
    print('{} = {}'.format(k,v))

Build_platform_target_temperature = 1
Hot_end_target_temperature = 1
Z_offset = 1
Status = 1
Object = 1


In [24]:
# Investigating Print head and Feeder data, as they seem to be static
#Static values should contain only one unique value, therefore investigating if that is the case here
for k, v in df[['Head_acceleration',
                 'Feeder_acceleration',
                 'Feeder_jerk',
                 'Feeder_max_speed']].nunique().to_dict().items():
    print('{} unique values: {}'.format(k,v))

Head_acceleration unique values: 7
Feeder_acceleration unique values: 1
Feeder_jerk unique values: 1
Feeder_max_speed unique values: 1


In [25]:
#Looking at unique values of feed rate and fan speed
dict_ = {'Head_acceleration': list(df['Head_acceleration'].unique()),
        'Fan_speed':list(df['Fan_speed'].unique()) }
dict_

{'Head_acceleration': ['1000.0',
  '5000.0',
  '900.0',
  '700.0',
  '800.0',
  '400.0',
  '3000.0'],
 'Fan_speed': ['0.0', '33.333333333333336', '66.66666666666667', '100.0']}

In [26]:
#Looking into types of accelerations observed and  how many times each acceleration appeared 
df['Head_acceleration'].value_counts()

1000.0    5379
800.0     3270
400.0     2180
5000.0    1779
900.0       24
700.0       14
3000.0       1
Name: Head_acceleration, dtype: int64

In [27]:
#Storing dataframes as csv files where df1-df3 represents print1-print3 and df represents all print data
df1 = df[df['Part'] == 1]
df2 = df[df['Part'] == 2]
df3 = df[df['Part'] == 3]
df.to_csv("df.csv",   index=False)
df1.to_csv("df1.csv", index=False)
df2.to_csv("df2.csv", index=False)
df3.to_csv("df3.csv", index=False)
