<a href="https://colab.research.google.com/github/Arslonbekjon/Machine-learning-understanding-of-processes-and-analytical-approach/blob/main/Machine_Learning_prediction_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import sklearn


In [None]:
URL="https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df=pd.read_csv(URL)

In [None]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

X_train= train_set.drop("median_house_value", axis=1)
y= train_set['median_house_value'].copy()

X_num = X_train.drop('ocean_proximity',axis=1)

#Pipline quramiz

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
# Bizga kerakli ustunlar indekslari

rooms_ix, bedrooms_ix, population_ix, households_ix=3,4,5,6

class CombinedAttributesAdder(BaseEstimator,TransformerMixin):
  def __init__(self,add_bedrooms_per_room=True):
    self.add_bedrooms_per_room = add_bedrooms_per_room
  def fit(self,X,y=None):
    return self # bizni funksiyamiz faqat transformer. estimator emas
  def transform(self,X):
    rooms_per_household = X[:,rooms_ix]/X[:,households_ix]
    population_per_household=X[:,population_ix]/X[:,households_ix]
    if self.add_bedrooms_per_room:# add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
       bedrooms_per_room=X[:,bedrooms_ix]/X[:,rooms_ix]
       return np.c_[X,rooms_per_household,population_per_household,bedrooms_per_room]
    else:
      return np.c_[X,rooms_per_household,population_per_household]


In [None]:
from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer

from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipeline = Pipeline([
    ('imputer',SimpleImputer(strategy='median')),
    ('attribs_adder', CombinedAttributesAdder(add_bedrooms_per_room=True)),
    ('std_scaler',StandardScaler())
])

In [None]:
from sklearn.compose import ColumnTransformer

num_attribs=list(X_num)
cat_attribs = ['ocean_proximity']

full_pipeline = ColumnTransformer([
    ('num',num_pipeline,num_attribs),
    ('cat',OneHotEncoder(),cat_attribs)
])

In [None]:
X_prepared = full_pipeline.fit_transform(X_train)

In [None]:
X_prepared

In [None]:
#Linear Regression

from sklearn.linear_model import LinearRegression

LR_model = LinearRegression()

In [None]:
LR_model.fit(X_prepared,y)

In [None]:
test_data=X_train.sample(10)

In [None]:
X_train

In [None]:
test_label = y.loc[test_data.index]
test_label


In [None]:
test_data_prepared = full_pipeline.transform(test_data)

predicted_labels = LR_model.predict(test_data_prepared)

In [None]:
predicted_labels

In [None]:
pd.DataFrame({'Bashorat':predicted_labels, 'Asl qiymat':test_label})

#Next step is testing the model

In [None]:
test_set

In [None]:
X_test=test_set.drop('median_house_value', axis=1)
X_test

In [None]:
y_test=test_set['median_house_value'].copy()
y_test


In [None]:
X_test_prepared = full_pipeline.transform(X_test)

In [None]:
y_predicted = LR_model.predict(X_test_prepared)

In [None]:
y_predicted

In [None]:
from sklearn.metrics import mean_absolute_error

MAE=mean_absolute_error(y_test,y_predicted)

print("MAE=", MAE)

In [None]:
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(y_test,y_predicted)
print("RMSE=",np.sqrt(MSE))

#Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

RF_model = RandomForestRegressor()
RF_model.fit(X_prepared,y)

In [None]:
y_predicted = RF_model.predict(X_test_prepared)

In [None]:
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(y_test,y_predicted)
print("RMSE=",np.sqrt(MSE))

#Cross-Validation

In [None]:
X = df.drop("median_house_value",axis=1)
y = df['median_house_value'].copy()

X_prepared = full_pipeline.transform(X)

In [None]:
from sklearn.model_selection import cross_val_score

mse_scores=cross_val_score(LR_model, X_prepared,y,scoring="neg_mean_squared_error",cv=5)

In [None]:
def display_scores(scores):
  print("Scores:", scores)
  print("Mean:", scores.mean())
  print("Std.dev:", scores.std())

In [None]:
display_scores(np.sqrt(-mse_scores))

In [None]:
scores = cross_val_score(RF_model, X_prepared, y, scoring="neg_mean_squared_error", cv=10)
LR_rmse_scores = np.sqrt(-scores)
display_scores(LR_rmse_scores)

#Pickle

In [None]:
import pickle
filename = 'RF_model_pkl'  #Faylga istalgan nom beramiz
with open(filename,'wb') as file:
  pickle.dump(RF_model,file)

In [None]:
with open(filename,'rb') as file:
  model = pickle.load(file)

#Joblib

In [None]:
import joblib

filename = 'LR_model.jbl' #Faylga istalgan nom beramiz
joblib.dump(LR_model, filename)

In [None]:
model = joblib.load(filename)

In [None]:
scores = cross_val_score(model, X_prepared,y, scoring='neg_mean_squared_error',cv=5)
LR_rmse_scores = np.sqrt(-scores)
display_scores(LR_rmse_scores)