In [12]:
# Importing relevant modules
import numpy
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import itertools
%matplotlib inline
import IPython.display
from tqdm import tqdm_notebook as tqdm
import sklearn.model_selection
import sklearn

# Ignoring warnings
import warnings
warnings.filterwarnings("ignore")

In [7]:
# Load Dataset:
data_url = 'https://raw.githubusercontent.com/Moataz-AbdElKhalek/Concrete_Compressive_Strength_Prediction/main/dataset/Concrete_Dataset_Classification.csv'
dataset = pd.read_csv(data_url)

print(dataset.head(4))

# Descriptive statistics
print("\nDataset has {} rows and {} columns".format(dataset.shape[0],dataset.shape[1]))

print()
y = dataset['y']
print(y.head(4))
print(y.shape)
print()

X = dataset.drop(['y'], axis=1)
print(X.head(4))
print(X.shape)

      X1     X2   X3     X4   X5      X6     X7     X8    y
0  540.0    0.0  0.0  162.0  2.5  1040.0  676.0   28.0  1.0
1  540.0    0.0  0.0  162.0  2.5  1055.0  676.0   28.0  1.0
2  332.5  142.5  0.0  228.0  0.0   932.0  594.0  270.0  1.0
3  332.5  142.5  0.0  228.0  0.0   932.0  594.0  365.0  1.0

Dataset has 1030 rows and 9 columns

0    1.0
1    1.0
2    1.0
3    1.0
Name: y, dtype: float64
(1030,)

      X1     X2   X3     X4   X5      X6     X7     X8
0  540.0    0.0  0.0  162.0  2.5  1040.0  676.0   28.0
1  540.0    0.0  0.0  162.0  2.5  1055.0  676.0   28.0
2  332.5  142.5  0.0  228.0  0.0   932.0  594.0  270.0
3  332.5  142.5  0.0  228.0  0.0   932.0  594.0  365.0
(1030, 8)


In [8]:
# Applying statistical Analysis on the data:
dataset.describe()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,y
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,-0.048544
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,0.999306
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,-1.0
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,-1.0
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,-1.0
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,1.0
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,1.0


In [9]:
# Dividing samples dataset into training and test datasets:
def dataset_divide(X, y):
  X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.90, random_state=1)
  return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = dataset_divide(X,y)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(927, 8)
(927,)
(103, 8)
(103,)


In [20]:
def study_SVM(C_range,kernel,gamma):
    
    scores = numpy.zeros((len(gamma),len(C_range)))
    
    for i in range (0,len(gamma)):
        for j in range (0,len(C_range)):
            clf = sklearn.svm.SVC(C=C_range[j],kernel =kernel,gamma=gamma[i],random_state=1)
            clf.fit(X_train,y_train)
            clf.predict(X_test)
            scores[i,j]=clf.score(X_test,y_test)
    
    #Finding best parameters
    best_C, best_gamma = 0, 0
    model = sklearn.svm.SVC(kernel=kernel,random_state=1)
    paras = {'C':C_range, 'gamma':gamma}
    clf_1 = sklearn.model_selection.GridSearchCV(model, paras)
    clf_1.fit(X_train, y_train)
    clf_1.predict(X_test)
    best_C=clf_1.best_params_['C']
    best_gamma=clf_1.best_params_['gamma']
            
    print('---------Max.score---------')
    print(numpy.amax(scores))
    print('---------Best_C---------')
    print(best_C)
    print('---------Best_gamma---------')
    print(best_gamma)
    print('---------Full.scores---------')
    return scores

In [21]:
study_SVM([10**p for p in range(-5, 5, 1)],'rbf',[10**p for p in range(-5, 5, 1)])

---------Max.score---------
0.8932038834951457
---------Best_C---------
10000
---------Best_gamma---------
1e-05
---------Full.scores---------


array([[0.52427184, 0.52427184, 0.52427184, 0.55339806, 0.77669903,
        0.74757282, 0.78640777, 0.84466019, 0.88349515, 0.88349515],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.74757282,
        0.73786408, 0.80582524, 0.89320388, 0.88349515, 0.87378641],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.52427184,
        0.72815534, 0.74757282, 0.76699029, 0.75728155, 0.75728155],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.52427184,
        0.73786408, 0.76699029, 0.75728155, 0.75728155, 0.75728155],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.52427184,
        0.65048544, 0.65048544, 0.65048544, 0.65048544, 0.65048544],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.52427184,
        0.63106796, 0.63106796, 0.63106796, 0.63106796, 0.63106796],
       [0.52427184, 0.52427184, 0.52427184, 0.52427184, 0.52427184,
        0.5631068 , 0.5631068 , 0.5631068 , 0.5631068 , 0.5631068 ],
       [0.52427184, 0.52427184, 0.5242718

In [24]:
def SVM_CV(kernels,C_range,gamma_range):
    
  # Preparing the Model:
  model = sklearn.svm.SVC(random_state=1)

  # Determining Model Hyperparameters to be tested and optimized:
  paras = {'kernel':kernels, 'C':C_range, 'gamma':gamma_range}

  # Preparing Cross-Validation to be used to fit the Model and the Hyperparameters:
  # Using 10-fold Cross-Validation:
  gridCV = sklearn.model_selection.GridSearchCV(model, paras, cv=10, scoring='accuracy', verbose=10)
  gridCV.fit(X, y)

  best_C = gridCV.best_params_['C']
  best_gamma = gridCV.best_params_['gamma']
  best_kernel = gridCV.best_params_['kernel']
  best_score = gridCV.best_score_
  results = gridCV.cv_results_

  return best_C, best_gamma, best_kernel, best_score, results

In [25]:
best_C, best_gamma, best_kernel, best_score, results = SVM_CV(['rbf'],numpy.logspace(-5, 5, 11),numpy.logspace(-5, 5, 11))

Fitting 10 folds for each of 121 candidates, totalling 1210 fits
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s


[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................
[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=1e-05, kernel=rbf ................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.4s remaining:    0.0s


[CV] .... C=1e-05, gamma=1e-05, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] ... C=1e-05, gamma=0.0001, kernel=rbf, score=0.524, total=   0.0s
[CV] C=1e-05, gamma=0.0001, kernel=rbf ...............................
[CV] .

[Parallel(n_jobs=1)]: Done 1210 out of 1210 | elapsed:  1.0min finished


In [26]:
print('best_C =',best_C)
print('best_gamma =',best_gamma)
print('best_kernel =', best_kernel)
print('Cross-Validation Mean Best Score for the Model =',best_score)
print('\nCross-Validation Mean Test Scores\n', results['mean_test_score'])

for i in range(10):
  print('\nSplit_'+str(i+1)+' Scores\n',results['split'+str(i)+'_test_score'])
  print('best_score (Split_'+str(i+1)+') =', max(results['split'+str(i)+'_test_score']))

best_C = 10000.0
best_gamma = 1e-05
best_kernel = rbf
Cross-Validation Mean Best Score for the Model = 0.8310679611650486

Cross-Validation Mean Test Scores
 [0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52330097 0.52427184 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.52427184 0.74271845 0.72038835 0.51747573 0.52427184
 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184 0.52427184
 0.52427184 0.80485437 0.72718447 0.70291262 0.61456311 0.57475728
 0.59708738 0.53300971 0.53203883 0.53009709 0.53009709 0.53009709
 0.78834951 0.76893204 0.72135922 0.61553398 0.57475728 0.59708738
 0.5368932  0.53203883 0.53009709 0.53