## LogisticRegression on Kyphosis

In [12]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
import pandas as pd
ky = pd.read_csv('../Cases/Kyphosis/Kyphosis.csv')
ky.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [14]:
X, y = ky.drop('Kyphosis', axis=1), ky['Kyphosis']

In [16]:
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.model_selection import GridSearchCV, KFold

kfold = KFold(n_splits=5, shuffle=True, random_state=24)
params = {'penalty': ['l1', 'l2', 'elasticnet', None],
         'l1_ratio': [0.01, 0.1, 1, 10],
         }  # 'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
lr = LogisticRegression()
grid_lr = GridSearchCV(lr, param_grid=params, cv=kfold, scoring='accuracy')
grid_lr.fit(X, y)
print(grid_lr.best_params_, grid_lr.best_score_)

{'l1_ratio': 0.01, 'penalty': None} 0.7654411764705882


## Random Forest for Concrete Strength

In [18]:
df = pd.read_csv('../Cases/Concrete Strength/Concrete_Data.csv')
df.head(1)

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99


In [24]:
X, y = df.drop(columns=['Strength']), df['Strength']

#### Decision Tree

In [26]:
from sklearn.tree import DecisionTreeRegressor
dct = DecisionTreeRegressor()
dct.get_params()
params = {'max_depth': [2,5,None],
         'min_samples_split': [2,5,10,20],
         'min_samples_leaf': [1,5,10,20],
         'max_features': [3,4,5,6]}

from sklearn.model_selection import GridSearchCV
grid_dct = GridSearchCV(dct, param_grid=params, cv=kfold, scoring='r2')
grid_dct.fit(X, y)
print(grid_dct.best_params_, grid_dct.best_score_)

{'max_depth': None, 'max_features': 6, 'min_samples_leaf': 1, 'min_samples_split': 2} 0.8621180930193498


In [35]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()

params = {'max_depth': [2,5,7],
         'min_samples_split': [2,5,10,20],
         'min_samples_leaf': [1,5,10,20],
         'max_features': [3,4,5,6]}
kfold = KFold(n_splits=5, shuffle=True, random_state=24)

from sklearn.model_selection import GridSearchCV
grid_rf = GridSearchCV(rf, param_grid=params, cv=kfold, scoring='r2')
grid_rf.fit(X, y)
print(grid_rf.best_params_, grid_rf.best_score_)

{'max_depth': 7, 'max_features': 6, 'min_samples_leaf': 1, 'min_samples_split': 2} 0.886363889939282


In [31]:
df_cv = pd.DataFrame(grid_dct.cv_results_)
df_cv.shape

(192, 17)

In [36]:
df_cv.sort_values('rank_test_score', ascending=True).iloc[0]

mean_fit_time                                                       0.255244
std_fit_time                                                         0.00049
mean_score_time                                                     0.008995
std_score_time                                                           0.0
param_max_depth                                                         None
param_max_features                                                         4
param_min_samples_leaf                                                     1
param_min_samples_split                                                    2
params                     {'max_depth': None, 'max_features': 4, 'min_sa...
split0_test_score                                                    0.92781
split1_test_score                                                   0.886638
split2_test_score                                                   0.929036
split3_test_score                                                   0.909479

In [39]:
test = pd.read_csv('../Cases/Concrete Strength/testConcrete.csv')
y_pred = grid_rf.predict(test)
test['Strength'] = y_pred
test

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,495,120,0,155,5,866,884,75,61.050879
1,262,129,0,271,2,808,787,174,40.19143
2,201,48,1,215,5,807,839,113,37.321164
3,329,141,0,286,1,881,823,229,41.008417
4,354,14,0,129,2,839,847,210,55.514148
5,150,23,23,114,4,883,638,36,24.861505
6,480,64,0,292,3,896,776,180,51.202178
7,393,49,82,132,1,887,830,271,57.492614
8,284,63,1,138,1,804,725,44,53.747049
9,206,38,0,103,2,818,719,191,48.515338
