### Model Training Using CPU Multi Cores

In [None]:
import pandas as pd

In [None]:
#Dataset: https://archive.ics.uci.edu/ml/datasets/wine+quality
df=pd.read_csv("winequality-white.csv",sep=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [None]:
df['quality'].unique()

array([6, 5, 7, 8, 4, 3, 9], dtype=int64)

In [None]:
df.shape

(4898, 12)

In [None]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [None]:
### Get the dependent features
X=df.iloc[:,:-1]
X.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9


In [None]:
y=df.iloc[:,-1]
y.head()

0    6
1    6
2    6
3    6
4    6
Name: quality, dtype: int64

### 1 CPU Core
- Machine Learning algo like Random Forest
- RepeatedStratifiedKFold
- cross val score

In [None]:
from time import time
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [None]:
## CPU cores we use n_jobs
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=1) # n_jobs=1 means we are assigning 1 CPU Core
end_time=time()
print(end_time-start_time)

9.635201454162598


### 2 CPU Cores

In [None]:
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=2)
end_time=time()
print(end_time-start_time)

6.195779323577881


### 4 CPU Cores

In [None]:

model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=4)
end_time=time()
print(end_time-start_time)

3.8061413764953613


### Lets utilize all the cores

In [None]:

model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=-1)
end_time=time()
print(end_time-start_time)

2.6145870685577393


In [None]:
cores=[1,2,3,4,5,6,7,8,9]
for core in cores:
    model=RandomForestClassifier(n_estimators=100)
    #stratified K Fold
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
    start_time=time()
    n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=core)
    end_time=time()
    total_time=end_time-start_time
    print("For Cores {} Training Time was {}secs".format(core,total_time))

For Cores 1 Training Time was 9.482826232910156secs
For Cores 2 Training Time was 6.402432918548584secs
For Cores 3 Training Time was 4.4365153312683105secs
For Cores 4 Training Time was 3.8379054069519043secs
For Cores 5 Training Time was 3.2981393337249756secs
For Cores 6 Training Time was 3.3377671241760254secs
For Cores 7 Training Time was 3.3541812896728516secs
For Cores 8 Training Time was 1.7274081707000732secs
For Cores 9 Training Time was 2.9806690216064453secs


In [None]:
fastapi.__version__

In [None]:
import uvicorn
uvicorn.__version__