In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
X = df.iloc[:,2:4]
y = df['Purchased']
X.head()

Unnamed: 0,Age,EstimatedSalary
0,19,19000
1,35,20000
2,26,43000
3,27,57000
4,19,76000


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(max_depth = 4).fit(X_train, y_train)
print(dtree.score(X_test, y_test))
print(dtree.feature_importances_)

0.93
[0.51743843 0.48256157]


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
tree_predict = dtree.predict(X_test)
print('Precision:', precision_score(y_test, tree_predict))
print('Recall:', recall_score(y_test, tree_predict))
print('F1 score:', f1_score(y_test, tree_predict))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, tree_predict)

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
print(svc.score(X_test, y_test))
svc_predict = svc.predict(X_test)
confusion_matrix(y_test, svc_predict)

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel = 'rbf', C = 1, gamma = 200).fit(X_train, y_train)
print(svc.score(X_test, y_test))
svc_predict = svc.predict(X_test)
confusion_matrix(y_test, svc_predict)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5).fit(X_train, y_train)
print(knn.score(X_test, y_test))
knn_predict = knn.predict(X_test)
confusion_matrix(y_test, knn_predict)

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C = 100).fit(X_train, y_train)
print(logreg.score(X_test, y_test))
logreg_predict = logreg.predict(X_test)
confusion_matrix(y_test, logreg_predict)

# Regression

In [None]:
# synthetic dataset for simple regression
from sklearn.datasets import make_regression
plt.figure()
plt.title('Sample regression problem with one input variable')
X_R1, y_R1 = make_regression(n_samples = 100, n_features=1,
                            n_informative=1, bias = 150.0,
                            noise = 30, random_state=0)
plt.scatter(X_R1, y_R1, marker= 'o', s=50)
plt.show()


In [None]:
from sklearn.linear_model import LinearRegression
X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state = 1)
lr = LinearRegression().fit(X_train, y_train)
print('R^2 score:', lr.score(X_test, y_test))
plt.figure()
plt.scatter(X_train, y_train, marker= 'o', color = 'red', s=50)
plt.scatter(X_test, y_test, marker= 'o', color = 'green', s=50)
plt.plot(X_train, lr.coef_ * X_train + lr.intercept_)
plt.show()

In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
knr = KNeighborsRegressor(n_neighbors = 15).fit(X_train_scaled, y_train)
predict_knr = knr.predict(X_test_scaled)
print('Actual\t\t\tPredicted')
for actual, predicted in zip(y_test, predict_knr):
    print(actual, '\t', predicted)
    
print('R^2:', knr.score(X_test_scaled, y_test))

# Polynomial features

In [None]:
# synthetic dataset for more complex regression
from sklearn.datasets import make_friedman1
plt.figure()
plt.title('Complex regression problem with one input variable')
X_F1, y_F1 = make_friedman1(n_samples = 100,
                           n_features = 7, random_state=0)

plt.scatter(X_F1[:, 2], y_F1, marker= 'o', s=50)
plt.show()

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly2 = PolynomialFeatures(degree = 4)
X_F1_poly = poly2.fit_transform(X_F1)
X_F1_train, X_F1_test, y_F1_train, y_F1_test = train_test_split(X_F1_poly, y_F1)
scaler = MinMaxScaler()

X_F1_train = scaler.fit_transform(X_F1_train)
X_F1_test = scaler.transform(X_F1_test)

lr2 = LinearRegression().fit(X_F1_train, y_F1_train)
print(lr2.score(X_F1_test, y_F1_test))

In [None]:
# Polynomial features and ridge regression
from sklearn.linear_model import Ridge
poly4 = PolynomialFeatures(degree = 4)
X_F1_poly = poly4.fit_transform(X_F1)
X_F1_train, X_F1_test, y_F1_train, y_F1_test = train_test_split(X_F1_poly, y_F1)
scaler = MinMaxScaler()

X_F1_train = scaler.fit_transform(X_F1_train)
X_F1_test = scaler.transform(X_F1_test)

lr4 = Ridge(alpha = 0.5).fit(X_F1_train, y_F1_train)
print(lr4.score(X_F1_test, y_F1_test))

# Validation curve

In [None]:
from sklearn.model_selection import validation_curve
X = df.iloc[:,2:4]
y = df['Purchased']

param_range = np.logspace(-6, 2, 8)
train_scores, test_scores = validation_curve(SVC(), X, y,
                                            param_name='gamma',
                                            param_range=param_range, cv=5)

train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.figure()
plt.title('Validation Curve with SVM')
plt.xlabel('$\gamma$ (gamma)')
plt.ylabel('Score')
plt.ylim(0.0, 1.1)
lw = 2

plt.semilogx(param_range, train_scores_mean, label='Training score',
            color='darkorange', lw=lw)

plt.fill_between(param_range, train_scores_mean - train_scores_std,
                train_scores_mean + train_scores_std, alpha=0.2,
                color='darkorange', lw=lw)

plt.semilogx(param_range, test_scores_mean, label='Cross-validation score',
            color='navy', lw=lw)

plt.fill_between(param_range, test_scores_mean - test_scores_std,
                test_scores_mean + test_scores_std, alpha=0.2,
                color='navy', lw=lw)

plt.legend(loc='best')
plt.show()


In [None]:
train_scores

In [None]:
test_scores