In [85]:
import pandas as pd
import numpy as np
from timeit import default_timer as timer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from IPython.display import HTML

**Load Data**

In [47]:
data = pd.read_csv("housing.csv")

In [48]:
data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


**Explore Data**

In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [50]:
data.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.870553,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.38507,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,296.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


**Droping Null Values**

In [51]:
data = data.dropna(axis = 'rows')

In [52]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20433 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20433 non-null  float64
 1   latitude            20433 non-null  float64
 2   housing_median_age  20433 non-null  float64
 3   total_rooms         20433 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20433 non-null  float64
 6   households          20433 non-null  float64
 7   median_income       20433 non-null  float64
 8   median_house_value  20433 non-null  float64
 9   ocean_proximity     20433 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.7+ MB


In [53]:
data['ocean_proximity'].value_counts()

ocean_proximity
<1H OCEAN     9034
INLAND        6496
NEAR OCEAN    2628
NEAR BAY      2270
ISLAND           5
Name: count, dtype: int64

In [54]:
data = data.replace('<1H OCEAN', 'NEAR OCEAN')

In [55]:
data = data.replace('NEAR BAY', 'NEAR OCEAN')

In [56]:
data = data.replace('ISLAND','INLAND')

In [57]:
data['ocean_proximity'].value_counts()

ocean_proximity
NEAR OCEAN    13932
INLAND         6501
Name: count, dtype: int64

**Label Encoding**

In [58]:
binary = LabelBinarizer()
data['ocean_proximity'] = binary.fit_transform(data['ocean_proximity'])

**Assighning Target and Features to Y and X Variable**

In [59]:
x = data.drop('median_house_value', axis = 1)
y = data['median_house_value']

**Splitting Data in Train And Test Set**

In [62]:
x_train, x_test, y_train, y_test = train_test_split(x, y , random_state=32,test_size = 0.2)

In [64]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(16346, 9)
(4087, 9)
(16346,)
(4087,)


**Scaling Data between range 0 to 1**

In [66]:
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
x_test = scale.fit_transform(x_test)

In [92]:
y_train = np.array(y_train)
y_test = np.array(y_test)

In [93]:
y_train = scale.fit_transform(y_train.reshape(-1,1))
y_test = scale.fit_transform(y_test.reshape(-1,1))

**Intel Extension For Scikit Learn**

In [None]:
from sklearnex import patch_sklearn
patch_sklearn()

**Training Model**

In [94]:
from sklearn.linear_model import LinearRegression
start = timer()
model = LinearRegression()
model.fit(x_train, y_train)
time_patched = timer() - start
print("Intel sklearnex Extension for scikit learn:{}".format(time_patched))

Intel sklearnex Extension for scikit learn:0.010906487703323364


**Evaluating Model**

In [98]:
prediction = model.predict(x_test)
score_patched = r2_score(y_test, prediction)
print("Accuracy_score:{}".format(score))

Accuracy_score:0.6451295901881948


**Original Scikit Learn**

In [99]:
from sklearnex import unpatch_sklearn
unpatch_sklearn()

**Training Model**

In [106]:
from sklearn.linear_model import LinearRegression
start = timer()
model = LinearRegression()
model.fit(x_train, y_train)
time_upatched = timer() - start
print("Normal scikit learn:{}".format(time_upatched))

Normal scikit learn:0.011071167886257172


**Evaluating Model**

In [101]:
prediction = model.predict(x_test)
score_unpatched = r2_score(y_test, prediction)
print("Accuracy_score:{}".format(score))

Accuracy_score:0.6451295901881948


In [107]:
HTML(f"<h3>Compare Time of Excecution of patched Scikit-learn and original</h3>"
     f"time of Excecution of patched Scikit-learn: {time_patched} <br>"
     f"time of excecution of unpatched Scikit-learn: {time_upatched} <br>"
     f"Metrics ratio: {time_patched/time_upatched} <br>"
     f"<h3>With Scikit-learn-intelex patching you can:</h3>"
    )