In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/3d-printer-material-requirement/3D_printer.csv


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline,make_pipeline
from sklearn.feature_selection import SelectKBest,chi2
from sklearn.tree import DecisionTreeClassifier

In [3]:
df = pd.read_csv('/kaggle/input/3d-printer-material-requirement/3D_printer.csv')
df.head()

Unnamed: 0,layer_height,wall_thickness,infill_density,infill_pattern,nozzle_temperature,Bed_temperature,Print_speed,Material,Fan_speed,Roughness,Tension_strenght,Elongation
0,0.02,8.0,90,grid,220,60,40,abs,0,25,18,1.2
1,0.02,7.0,90,honeycomb,225,65,40,abs,25,32,16,1.4
2,0.02,1.0,80,grid,230,70,40,abs,50,40,8,0.8
3,0.02,4.0,70,honeycomb,240,75,40,abs,75,68,10,0.5
4,0.02,6.0,90,grid,250,80,40,abs,100,92,5,0.7


In [4]:
Y = df.iloc[:,7]
Y.head()

0    abs
1    abs
2    abs
3    abs
4    abs
Name: Material, dtype: object

In [5]:
X = df.iloc[:,list(range(7))+list(range(8,12))]
X.head()

Unnamed: 0,layer_height,wall_thickness,infill_density,infill_pattern,nozzle_temperature,Bed_temperature,Print_speed,Fan_speed,Roughness,Tension_strenght,Elongation
0,0.02,8.0,90,grid,220,60,40,0,25,18,1.2
1,0.02,7.0,90,honeycomb,225,65,40,25,32,16,1.4
2,0.02,1.0,80,grid,230,70,40,50,40,8,0.8
3,0.02,4.0,70,honeycomb,240,75,40,75,68,10,0.5
4,0.02,6.0,90,grid,250,80,40,100,92,5,0.7


In [6]:
X_train,X_test,y_train,y_test = train_test_split(X,
                                                 Y,
                                                 test_size=0.2,
                                                random_state=42)

In [7]:
# one hot encoding
trf1 = ColumnTransformer([
    ('ohe_sex_infill_pattern',OneHotEncoder(sparse=False,handle_unknown='ignore'),[3])
],remainder='passthrough')

In [8]:
trf2 = ColumnTransformer([
    ('scale',MinMaxScaler(),slice(0,10))
])

In [9]:
trf3 = SelectKBest(score_func=chi2,k=8)

In [10]:
trf4 = DecisionTreeClassifier()

In [11]:
pipe = Pipeline([
    ('trf1',trf1),
    ('trf2',trf2),
    ('trf3',trf3),
    ('trf4',trf4)
])

In [12]:
pipe.fit(X_train,y_train)



In [13]:
pipe.named_steps

{'trf1': ColumnTransformer(remainder='passthrough',
                   transformers=[('ohe_sex_infill_pattern',
                                  OneHotEncoder(handle_unknown='ignore',
                                                sparse=False),
                                  [3])]),
 'trf2': ColumnTransformer(transformers=[('scale', MinMaxScaler(), slice(0, 10, None))]),
 'trf3': SelectKBest(k=8, score_func=<function chi2 at 0x7c39ead05510>),
 'trf4': DecisionTreeClassifier()}

In [14]:
from sklearn import set_config
set_config(display='diagram')

In [15]:
y_pred = pipe.predict(X_test)
y_pred

array(['pla', 'abs', 'abs', 'pla', 'pla', 'abs', 'pla', 'abs', 'abs',
       'pla', 'abs', 'abs', 'abs', 'abs'], dtype=object)

In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.8571428571428571

In [17]:
params = {
    'trf4__max_depth':[1,2,3,4,5,None]
}

In [18]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(pipe, params, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)



In [19]:
grid.best_score_

0.8636363636363636

In [20]:
grid.best_params_

{'trf4__max_depth': 4}

In [21]:
# export 
import pickle
pickle.dump(pipe,open('pipe.pkl','wb'))