In [None]:
% matplotlib inline


In [61]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler,PolynomialFeatures

from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline

In [None]:
housing_data = pd.read_fwf("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",header = None)

In [None]:
housing_data.columns = ['crime_rate','zoned_land','industry','bounds_river','nox_conc','rooms','age','distance','highways','tax','pt_ratio','b_estimator','pop_status','price']

In [None]:
housing_data

In [None]:
housing_data.describe()

In [None]:
housing_data.dtypes

In [None]:
housing_attributes =housing_data.drop(columns = "price")
housing_attributes.shape

In [None]:
housing_prices = housing_data.price

In [None]:
housing_prices.shape

In [None]:
# Preprocessing
scaler = MinMaxScaler()

housing_attributes_scaled = scaler.fit_transform(housing_attributes)
housing_attributes_scaled

In [None]:
# test a model 
linear_regression = LinearRegression()

In [None]:
linear_regression.fit(housing_attributes_scaled,housing_prices)

In [None]:
linear_regression.coef_

In [None]:
linear_regression.score(housing_attributes_scaled,housing_prices)

In [None]:
predictions = linear_regression.predict(housing_attributes_scaled)
np.sqrt(mean_squared_error(housing_prices,predictions))

In [None]:
ransac = RANSACRegressor(
    base_estimator=LinearRegression(),
    max_trials=200,
    min_samples=0.25
)

In [None]:
ransac.fit(housing_attributes_scaled,housing_prices)

In [None]:
ransac.estimator_.coef_

In [None]:
ransac.inlier_mask_.sum()/len(housing_prices)

In [None]:
inliers = housing_data[ransac.inlier_mask_]
outliers = housing_data[~ransac.inlier_mask_]

In [None]:
plt.scatter(inliers.rooms, inliers.price,label = "inliers")
plt.scatter(outliers.rooms, outliers.price,label = "outliers")

plt.xlabel('Rooms')
plt.ylabel('Price, $10^3$ USD')
plt.legend()

plt.show()

In [None]:
ransac.score(housing_attributes_scaled,housing_prices)

In [None]:
# we aim to use the inliers in order to achieve the highest accuracy possible
ransac.score(housing_attributes_scaled[ransac.inlier_mask_],housing_prices[ransac.inlier_mask_])

In [None]:
ransac.score(housing_attributes_scaled[~ransac.inlier_mask_],housing_prices[~ransac.inlier_mask_])

In [None]:
polynomial_features = PolynomialFeatures(degree= 5,interaction_only=True)
housing_attributes_scaled_cubed =  polynomial_features.fit_transform(housing_attributes_scaled)

In [None]:
polynomial_regression = LinearRegression()
polynomial_regression.fit(housing_attributes_scaled_cubed,housing_prices)

In [None]:
polynomial_regression.score(housing_attributes_scaled_cubed,housing_prices)

In [None]:
iris_data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",header=None)


In [None]:
iris_data.columns = ['sepal length','sepal width', 'petal length', 'petal width','iris_type']

In [None]:
iris_data

In [None]:
iris_attributes = iris_data.drop(columns= 'iris_type')
iris_types  = iris_data.iris_type

In [None]:
scaler = MinMaxScaler()
iris_attributes_scaled = scaler.fit_transform(iris_attributes)

In [None]:
logistic_regression = LogisticRegression(C=1e9)

In [None]:
logistic_regression.fit(iris_attributes_scaled,iris_types)

In [None]:
logistic_regression.coef_

In [None]:
# is this a setosa? -> 0,1
# is this a verginica? -> 0,1
# is this a versicolor? -> 0,1

In [None]:
logistic_regression.predict(iris_attributes_scaled)

In [None]:
logistic_regression.score(iris_attributes_scaled,iris_types)

In [None]:
logistic_regression.predict_proba(iris_attributes_scaled[:10])

In [None]:
logistic_regression = LogisticRegression(C = 1e-9)
logistic_regression.fit(iris_attributes_scaled,iris_types)
logistic_regression.score(iris_attributes_scaled,iris_types)

In [75]:
log_reg_pipe = Pipeline([
('square_feat',PolynomialFeatures(degree=2,interaction_only=False)),
('min_max_scaler', MinMaxScaler()),
('logistic_regression',LogisticRegression(C=1))
],verbose=True)

In [76]:
log_reg_pipe.fit(iris_attributes,iris_types)

[Pipeline] ....... (step 1 of 3) Processing square_feat, total=   0.0s
[Pipeline] .... (step 2 of 3) Processing min_max_scaler, total=   0.0s
[Pipeline]  (step 3 of 3) Processing logistic_regression, total=   0.0s


Pipeline(steps=[('square_feat', PolynomialFeatures()),
                ('min_max_scaler', MinMaxScaler()),
                ('logistic_regression', LogisticRegression(C=1))],
         verbose=True)

In [77]:
log_reg_pipe.steps

[('square_feat', PolynomialFeatures()),
 ('min_max_scaler', MinMaxScaler()),
 ('logistic_regression', LogisticRegression(C=1))]

In [78]:
log_reg_pipe.score(iris_attributes,iris_types)

0.96