Pre Processing


In [1]:
import sklearn.datasets as skd
import sklearn.preprocessing as skp
import numpy as np
from sklearn.impute import SimpleImputer

#loading data
iris = skd.load_iris()

#Normalizing using L2
normalizer = skp.Normalizer(norm='l2').fit(iris.data)
iris_normalized = normalizer.transform(iris.data)
print(iris_normalized.mean(axis = 0))

#OneHotEncoding
enc = skp.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])

#fill nan
iris.data[:50,:] = np.nan

#Imputation
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)
print(iris_imputed.mean(axis = 0))

[0.75140029 0.40517418 0.45478362 0.14107142]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[6.262 2.872 4.906 1.676]


Hands-On KNN

In [None]:
import sklearn.datasets as skd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

#loading data
iris = skd.load_iris()

#Spliting data
X_train, X_test, Y_train, Y_test = train_test_split(iris.data
                                                    , iris.target,stratify=iris.target,
                                                     random_state=30)
print(X_train.shape)
print(X_test.shape)

#Model Creation
knn_classifier = KNeighborsClassifier()   

knn_classifier = knn_classifier.fit(X_train, Y_train)

print(knn_classifier.score(X_train,Y_train))
print(knn_classifier.score(X_test,Y_test))
l = []
#Ranging n_neighbours values from 3 - 10
for i in range(3,11):
  knn_classifier = KNeighborsClassifier(n_neighbors = i)
  knn_classifier = knn_classifier.fit(X_train, Y_train)
  l.append(knn_classifier.score(X_train,Y_train))
print(l.index(max(l)))

Decision Tree - Hands-On

In [6]:
import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np
from sklearn.tree import DecisionTreeRegressor

np.random.seed(100)


boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data, boston.target,  random_state=30)

print(X_train.shape)


print(X_test.shape)



dt_Regressor = DecisionTreeRegressor()

dt_reg = dt_Regressor.fit(X_train, Y_train)

print(dt_reg.score(X_train,Y_train))

print(dt_reg.score(X_test,Y_test))

predicted = dt_reg.predict(X_test[:2])
print(predicted)



max_depth = 2
maxscore = 0
for x in range(2, 6):
     dt_Regressor = DecisionTreeRegressor(max_depth=x)
     dt_reg = dt_Regressor.fit(X_train, Y_train)
     score = dt_reg.score(X_test, Y_test)
     if(maxscore < score):
         max_depth = x
         maxscore = score
print(max_depth)

(379, 13)
(127, 13)
1.0
0.8098834820264638
[18.2 13.9]
4


Hands-On - Ensemble

In [4]:
from sklearn.ensemble import RandomForestRegressor
import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data, 
      boston.target,  random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_Regressor = RandomForestRegressor()
rf_reg = rf_Regressor.fit(X_train, Y_train)

print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))

predicted = rf_reg.predict(X_test[:2])
print(predicted)


depths = []
scores = []
c_estimators = 100
for x in range(2, 6):
   rf_Regressor = RandomForestRegressor(n_estimators=c_estimators, max_depth=x)
   rf_reg = rf_Regressor.fit(X_train, Y_train)
   score = rf_reg.score(X_test, Y_test)
   depths.append(x)
   scores.append(rf_reg.score(X_test, Y_test))

print( (depths[np.argmax(scores)],c_estimators) )

(379, 13)
(127, 13)
0.9805545439239387
0.88608530301534
[19.17   9.887]
(5, 100)


Hands-On - SVM

In [14]:
import sklearn.datasets as sd
import sklearn.preprocessing as skp
import sklearn.model_selection as sms
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn.svm as ssvm
np.random.seed(100)

digits = sd.load_digits()
X_train, X_test, Y_train, Y_test = train_test_split(digits.data, digits.target, 
                              random_state=30, stratify=digits.target)
print(X_train.shape)
print(X_test.shape)

svm = ssvm.SVC()
svm_clf = svm.fit(X_train, Y_train)
print(svm_clf.score(X_test,Y_test))



standardizer = skp.StandardScaler()
standardizer = standardizer.fit(digits.data)
digits_standardized = standardizer.transform(digits.data)
X_train, X_test, Y_train, Y_test = train_test_split(digits_standardized, 
                      digits.target, random_state=30, stratify=digits.target)
svm_clf2 = ssvm.SVC().fit(X_train, Y_train)
print(svm_clf2.score(X_test,Y_test))

(1347, 64)
(450, 64)
0.9822222222222222
0.9733333333333334


Hands-On - Clustering

In [None]:
from sklearn import metrics
import sklearn.datasets as skd
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation

iris = skd.load_iris()
km = KMeans(n_clusters=3)
km_cls = km.fit(iris.data)
print(metrics.homogeneity_score(km_cls.predict(iris.data), iris.target))


am = AgglomerativeClustering (n_clusters=3)
agg_cls = am.fit(iris.data)
print(metrics.homogeneity_score(agg_cls.fit_predict(iris.data), iris.target))

af = AffinityPropagation()
af_cls = af.fit(iris.data)
print(metrics.homogeneity_score(af_cls.predict(iris.data), iris.target))

In [2]:
boston

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3