In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
from sklearn.datasets._california_housing import fetch_california_housing

In [3]:
df = fetch_california_housing()

In [4]:
dfm = pd.DataFrame(df.data)

In [5]:
dfm.columns = df.feature_names

In [6]:
dfm

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [7]:
dfm['target'] = df.target

In [8]:
dfm

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [9]:
dfm.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


# Dividing the dataset into independent and dependent features

In [69]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = dfm.iloc[:,:-1]
y = dfm.iloc[:,-1:]

In [11]:
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [12]:
y.head()

Unnamed: 0,target
0,4.526
1,3.585
2,3.521
3,3.413
4,3.422


# Linear Regression

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [76]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)
X_scaled = scaler.fit_transform(X_train)
y_scaled = scaler.transform(X_test)

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

In [71]:
lin_reg = LinearRegression()
neg_rmse = cross_val_score(lin_reg, X_scaled,y_train, scoring='neg_mean_squared_error',cv=5)

In [72]:
neg_mean_rmse = np.mean(neg_rmse)

In [73]:
neg_mean_rmse

-0.5192652011433679

In [74]:
lin = lin_reg.fit(X_scaled,y_train)

In [78]:
y_pred = lin.predict(y_scaled)

In [79]:
rmse = mean_squared_error(y_test,y_pred)

In [80]:
rmse

0.5558915986952443

In [81]:
from sklearn.metrics import r2_score
r2_score = r2_score(y_test,y_pred)

In [82]:
r2_score

0.5757877060324508

# Ridge Regression

In [83]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
ridge = Ridge()

In [84]:
params = [{'alpha':[1e-20,1e-15,1e-10,1e-5,1e-8,1e-3,1e-2,1,2,5,6,8,9,12,23,34,45,56]}]

In [85]:
ridge_model_grid_search = GridSearchCV(estimator=ridge,scoring='neg_mean_squared_error', param_grid=params, cv=5)

In [86]:
ridge_model_grid_search

In [87]:
ridge_model_grid_search.fit(X_scaled,y_train)

In [88]:
print(ridge_model_grid_search.best_params_)
print(ridge_model_grid_search.best_score_)

{'alpha': 0.01}
-0.5192652008456994


In [89]:
ridge_model_random_search = RandomizedSearchCV(estimator=ridge,scoring='neg_mean_squared_error', param_distributions=params, cv=5)

In [90]:
ridge_model_random_search.fit(X_scaled,y_train)

In [91]:
print(ridge_model_random_search.best_params_)
print(ridge_model_random_search.best_score_)

{'alpha': 0.01}
-0.5192652008456994


# Lasso Regression

In [92]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
lasso = Lasso()

In [93]:
params = [{'alpha':[1e-20,1e-15,1e-10,1e-5,1e-8,1e-3,1e-2,1,2,5,6,8,9,12,23,34,45,56]}]

In [94]:
lasso_model = GridSearchCV(estimator=lasso,scoring='neg_mean_squared_error', param_grid=params, cv=10)

In [95]:
lasso_model.fit(X_scaled,y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [96]:
lasso_model.best_score_

-0.5195895853819601

#  Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
log = LogisticRegression()

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
df1 = load_breast_cancer()

In [None]:
df = pd.DataFrame(df1.data)

In [None]:
df.columns =df1['feature_names']

In [None]:
df

In [None]:
df['target'] = df1.target

In [None]:
df

In [None]:
df.groupby('target')['target'].count()

In [None]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1:]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42, test_size=0.2)

In [None]:
log.fit(X_train,y_train)

In [None]:
y_pred = log.predict(X_test)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

In [97]:
X_scaled

array([[-0.326196  ,  0.34849025, -0.17491646, ...,  0.05137609,
        -1.3728112 ,  1.27258656],
       [-0.03584338,  1.61811813, -0.40283542, ..., -0.11736222,
        -0.87669601,  0.70916212],
       [ 0.14470145, -1.95271028,  0.08821601, ..., -0.03227969,
        -0.46014647, -0.44760309],
       ...,
       [-0.49697313,  0.58654547, -0.60675918, ...,  0.02030568,
        -0.75500738,  0.59946887],
       [ 0.96545045, -1.07984112,  0.40217517, ...,  0.00707608,
         0.90651045, -1.18553953],
       [-0.68544764,  1.85617335, -0.85144571, ..., -0.08535429,
         0.99543676, -1.41489815]])

In [99]:
X_train

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
14196,3.2596,33.0,5.017657,1.006421,2300.0,3.691814,32.71,-117.03
8267,3.8125,49.0,4.473545,1.041005,1314.0,1.738095,33.77,-118.16
17445,4.1563,4.0,5.645833,0.985119,915.0,2.723214,34.66,-120.48
14265,1.9425,36.0,4.002817,1.033803,1418.0,3.994366,32.69,-117.11
2271,3.5542,43.0,6.268421,1.134211,874.0,2.300000,36.78,-119.80
...,...,...,...,...,...,...,...,...
11284,6.3700,35.0,6.129032,0.926267,658.0,3.032258,33.78,-117.96
11964,3.0500,33.0,6.868597,1.269488,1753.0,3.904232,34.02,-117.43
5390,2.9344,36.0,3.986717,1.079696,1756.0,3.332068,34.03,-118.38
860,5.7192,15.0,6.395349,1.067979,1777.0,3.178891,37.58,-121.96
