# Lineare Regression:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as LR
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [None]:
from mglearn.datasets import make_wave
from mglearn.datasets import make_forge
import mglearn

### Daten:

In [None]:
X,y = make_wave(n_samples=60)
plt.plot(X,y,"o")
plt.xlabel("Feature")
plt.ylabel("Target")
plt.grid()

In [None]:
X_train, X_test, y_train,y_test = train_test_split(X,y, random_state=0)


In [None]:
lr = LR().fit(X_train,y_train)

In [None]:
print(f"The weight is {lr.coef_}")

In [None]:
print(f"The offset is {lr.intercept_}")

#### probably underfitting:

In [None]:
print(f"The training score is {lr.score(X_train,y_train)}")
print(f"The test score is {lr.score(X_test,y_test)}")

#### probably overfitting:

In [None]:
cancer  = load_breast_cancer()
cancer.keys()

In [None]:
np.shape(cancer["data"])

In [None]:
data  = cancer["data"]
target = cancer["target"]
dataf = pd.DataFrame(data,columns=cancer["feature_names"])
dataf

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    data, target, stratify=target, random_state=42
)


In [None]:
lr = LR().fit(X_train,y_train)

In [None]:
print(f"The weight is {lr.coef_}")

In [None]:
print(f"The offset is {lr.intercept_}")

In [None]:
print(f"The training score is {lr.score(X_train,y_train)}")
print(f"The test score is {lr.score(X_test,y_test)}")

# Ridge regression:

In [None]:
from sklearn.linear_model import Ridge

In [None]:
ridge = Ridge().fit(X_train,y_train)

In [None]:
print(f"The training score is {ridge.score(X_train,y_train)}")
print(f"The test score is {ridge.score(X_test,y_test)}")

In [None]:
# a bigger alpha forces 'w' closer to zero. default == 1.0
ridge = Ridge(alpha=0).fit(X_train,y_train)

In [None]:
print(f"The training score is {ridge.score(X_train,y_train)}")
print(f"The test score is {ridge.score(X_test,y_test)}")

In [None]:
ridgete = []
ridgetr = []
for i in np.arange(0,20,0.1):
    ridge  = Ridge(alpha=i).fit(X_train,y_train)
    ridgete.append(ridge.score(X_test,y_test))
    ridgetr.append(ridge.score(X_train,y_train))

In [None]:
plt.plot(ridgetr,ridgete,"o")
plt.xlabel("R^2 training")
plt.ylabel("R^2 test")
plt.grid()

# Lasso:

In [None]:
from sklearn.linear_model import Lasso

In [None]:
lasso = Lasso().fit(X_train,y_train)

In [None]:
print(f"The training score is {lasso.score(X_train,y_train)}")
print(f"The test score is {lasso.score(X_test,y_test)}")
print(f"Number coef used {np.sum(lasso.coef_ !=0)}")

In [None]:
lasso001  = Lasso(max_iter=100000, alpha=0.0001).fit(X_train,y_train)

In [None]:
print(f"The training score is {lasso001.score(X_train,y_train)}")
print(f"The test score is {lasso001.score(X_test,y_test)}")
print(f"Number coef used {np.sum(lasso001.coef_ !=0)}")

# Linear models for classification:

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X,y = make_forge()
print(X)
print(y)

#### For C =1 standart:

In [None]:
clflr = LogisticRegression(max_iter=1000)
clfls = LinearSVC()

In [None]:
clflr.fit(X_train,y_train)

In [None]:
print(clflr.score(X_test,y_test))
clflr.score(X_train,y_train)

In [None]:
coef0  =clflr.coef_.T

#### For C =100 less regularization:

In [None]:
clflr = LogisticRegression(C=100,max_iter=1000)
clflr.fit(X_train,y_train)

In [None]:
print(clflr.score(X_test,y_test))
clflr.score(X_train,y_train)

In [None]:
coef1 = clflr.coef_.T

#### For C =0.01 more regularization:

In [None]:
clflr = LogisticRegression(C=0.01,max_iter=1000)
clflr.fit(X_train,y_train)

In [None]:
print(clflr.score(X_test,y_test))
clflr.score(X_train,y_train)

In [None]:
coef2  = clflr.coef_.T

#### Plotting the coefficients:

In [None]:
plt.plot(coef0,"o",label="C=1")
plt.plot(coef1,"o",label="C=100")
plt.plot(coef2,"o",label="C=0.01")
plt.xticks(range(np.shape(data)[1]), cancer["feature_names"], rotation=90)
plt.ylabel("")
plt.grid()
plt.legend()


As expected more regularization forces the coefficients closer to zero.

## Linear models for multiclass classification:

In [None]:
from sklearn.datasets import make_blobs

In [None]:
X,y = make_blobs(random_state=42)
dfx = pd.DataFrame(zip(X[:,0],X[:,1],y),columns=["Feature 0","Feature 1","Class/ Target"])
dfx

In [None]:
mglearn.discrete_scatter(X[:,0],X[:,1],y)
plt.xlabel("Feature 0")
plt.ylabel("Feeature 1")
plt.legend(["Class 0","Class 1","Class 2"])

### LinearSVC classifier:

In [None]:
from sklearn.svm import LinearSVC

In [None]:
linear_svm = LinearSVC().fit(X,y)

In [None]:
linear_svm.coef_

In [None]:
linear_svm.coef_.shape

- There are three row of _coefficients_ one for each class.
- Ther are the two _features_.
- $ \rightarrow $ shape(3,2)

In [None]:
linear_svm.intercept_.shape

- There is a _intercept/ offset_ for each class

In [None]:
line = np.linspace(-15,15)
coef = linear_svm.coef_
intercept = linear_svm.intercept_
intercept

In [None]:

mglearn.discrete_scatter(X[:,0],X[:,1],y)
plt.xlabel("Feature 0")
plt.ylabel("Feeature 1")
plt.legend(["Class 0","Class 1","Class 2"])
for i in range(len(intercept)):	
	plt.plot(line,-(line*coef[i][0]+intercept[i])/coef[i][1])
plt.grid()

- Everything over the blue line is _Class 0_, everything under the blue line is _rest_.
- Everything right the orange line is _Class 1_, everything left the blue line is _rest_.
- Everything over the blue line is _Class 0_, everything under the blue line is _rest_.
- In the triangle in the middle _Classes_ are choosen by which line is closest.