<a href="https://colab.research.google.com/github/Islomjon000/Practical-projects/blob/main/ML_yaratish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import pandas as pd
import numpy as np
import sklearn # scikit-learn kutubxonasi

In [23]:
URL = "https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df = pd.read_csv(URL)

In [24]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

X_train = train_set.drop("median_house_value", axis=1)
y = train_set["median_house_value"].copy()

X_num = X_train.drop("ocean_proximity", axis=1)

In [25]:
from sklearn.base import BaseEstimator, TransformerMixin
# bizga kerak ustunlar indekslari
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self # bizni funksiyamiz faqat transformer. estimator emas
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room: # add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household, bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

In [26]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipeline = Pipeline([
          ('imputer', SimpleImputer(strategy='median')),
          ('attribs_adder', CombinedAttributesAdder(add_bedrooms_per_room = True)),
          ('std_scaler', StandardScaler())
])

In [27]:
from sklearn.compose import ColumnTransformer

num_attribs = list(X_num)
cat_attribs = ['ocean_proximity']

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', OneHotEncoder(), cat_attribs)
])

In [28]:
X_prepared = full_pipeline.fit_transform(X_train)

In [29]:
X_prepared[0:5,:]

array([[ 1.27258656, -1.3728112 ,  0.34849025,  0.22256942,  0.21122752,
         0.76827628,  0.32290591, -0.326196  , -0.17491646,  0.05137609,
        -0.2117846 ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [ 0.70916212, -0.87669601,  1.61811813,  0.34029326,  0.59309419,
        -0.09890135,  0.6720272 , -0.03584338, -0.40283542, -0.11736222,
         0.34218528,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [-0.44760309, -0.46014647, -1.95271028, -0.34259695, -0.49522582,
        -0.44981806, -0.43046109,  0.14470145,  0.08821601, -0.03227969,
        -0.66165785,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [ 1.23269811, -1.38217186,  0.58654547, -0.56148971, -0.40930582,
        -0.00743434, -0.38058662, -1.01786438, -0.60001532,  0.07750687,
         0.78303162,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [-0.10855122,  0.5320839 ,  1

In [30]:
from sklearn.linear_model import LinearRegression

LR_model = LinearRegression()

In [31]:
LR_model.fit(X_prepared, y)

In [32]:
# tasodifiy 5 ta qatorni ajratib olamiz
test_data = X_train.sample(5)
test_data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
17272,-119.71,34.41,18.0,1225.0,317.0,694.0,306.0,3.6823,NEAR OCEAN
11990,-117.53,33.97,34.0,1293.0,215.0,774.0,217.0,3.8906,INLAND
11080,-117.84,33.8,35.0,1490.0,251.0,629.0,257.0,4.3661,<1H OCEAN
10354,-117.62,33.77,43.0,1911.0,439.0,930.0,433.0,4.6369,<1H OCEAN
8130,-118.18,33.8,15.0,2407.0,589.0,1591.0,506.0,3.0513,NEAR OCEAN


In [33]:
# yuqoridagi qatorlarga mos keluvchi narxlarni ajratib olamiz (biz aynan shu qiymatlarni bashorat qilishimiz kerak)
test_label = y.loc[test_data.index]
test_label

17272    255000.0
11990    141000.0
11080    222100.0
10354    186400.0
8130     148100.0
Name: median_house_value, dtype: float64

In [34]:
test_data_prepared = full_pipeline.transform(test_data)
test_data_prepared

array([[-0.06367671, -0.57715477, -0.84178589, -0.65162202, -0.52863916,
        -0.64418546, -0.50921025, -0.10421725, -0.59982637, -0.07159779,
         0.79176854,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ],
       [ 1.02328371, -0.78308938,  0.42784199, -0.62035162, -0.77207916,
        -0.57382622, -0.74283277,  0.00517044,  0.21919732,  0.04058068,
        -0.80315719,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.86871595, -0.86265502,  0.50719373, -0.52975945, -0.68615916,
        -0.70135234, -0.63783389,  0.25487685,  0.15181584, -0.05609503,
        -0.76562882,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.9784092 , -0.87669601,  1.14200767, -0.33615893, -0.23746582,
        -0.4366257 , -0.17583879,  0.3970861 , -0.42803125, -0.08197641,
         0.29080232,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.69919001, -0.86265502, -1

In [38]:
predicted_data = LR_model.predict(test_data_prepared)
predicted_data

array([248970.57408056, 153888.8184677 , 234028.51093192, 271956.04690658,
       182332.6421528 ])

In [40]:
pd.DataFrame({'Prognoz': predicted_data, 'Real baxosi': test_label})

Unnamed: 0,Prognoz,Real baxosi
17272,248970.574081,255000.0
11990,153888.818468,141000.0
11080,234028.510932,222100.0
10354,271956.046907,186400.0
8130,182332.642153,148100.0


In [41]:
X_test = test_set.drop('median_house_value', axis=1)
X_test

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
20046,-119.01,36.06,25.0,1505.0,,1392.0,359.0,1.6812,INLAND
3024,-119.46,35.14,30.0,2943.0,,1565.0,584.0,2.5313,INLAND
15663,-122.44,37.80,52.0,3830.0,,1310.0,963.0,3.4801,NEAR BAY
20484,-118.72,34.28,17.0,3051.0,,1705.0,495.0,5.7376,<1H OCEAN
9814,-121.93,36.62,34.0,2351.0,,1063.0,428.0,3.7250,NEAR OCEAN
...,...,...,...,...,...,...,...,...,...
15362,-117.22,33.36,16.0,3165.0,482.0,1351.0,452.0,4.6050,<1H OCEAN
16623,-120.83,35.36,28.0,4323.0,886.0,1650.0,705.0,2.7266,NEAR OCEAN
18086,-122.05,37.31,25.0,4111.0,538.0,1585.0,568.0,9.2298,<1H OCEAN
2144,-119.76,36.77,36.0,2507.0,466.0,1227.0,474.0,2.7850,INLAND


In [42]:
y_test = test_set['median_house_value'].copy()
y_test

20046     47700.0
3024      45800.0
15663    500001.0
20484    218600.0
9814     278000.0
           ...   
15362    263300.0
16623    266800.0
18086    500001.0
2144      72300.0
3665     151500.0
Name: median_house_value, Length: 4128, dtype: float64

In [43]:
X_test_prepared = full_pipeline.transform(X_test)

In [45]:
y_predicted = LR_model.predict(X_test_prepared)

In [53]:
from sklearn.metrics import mean_absolute_error
lin_mse = mean_absolute_error(y_test, y_predicted)
# RMSE hisoblaymiz
print('Mae:',lin_mse)

Mae: 50898.7395349408


In [54]:
from sklearn.metrics import mean_squared_error
lin_mse = mean_squared_error(y_test, y_predicted)
# RMSE hisoblaymiz
lin_rmse = np.sqrt(lin_mse)
print('Rme:',lin_rmse)

Rme: 72701.32600762138


In [55]:
from sklearn.ensemble import RandomForestRegressor
RF_model = RandomForestRegressor()
RF_model.fit(X_prepared, y)

In [56]:
y_predicted = RF_model.predict(X_test_prepared)
lin_mse = mean_squared_error(y_test, y_predicted)
# RMSE hisoblaymiz
lin_rmse = np.sqrt(lin_mse)
print(lin_rmse)

50033.95553846913


Cross-Validation usuli bilan baholash

In [57]:
X = df.drop("median_house_value", axis=1)
y = df["median_house_value"].copy()

X_prepared = full_pipeline.transform(X)

In [59]:
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Std.dev:", scores.std())

In [61]:
from sklearn.model_selection import cross_val_score

In [62]:
scores = cross_val_score(LR_model, X_prepared, y, scoring="neg_mean_squared_error", cv=10)
LR_rmse_scores = np.sqrt(-scores)

In [65]:
display_scores(LR_rmse_scores)

Scores: [84188.51219065 61197.24357613 86752.24346334 62289.14292385
 80540.40041898 68919.39949642 52503.82940087 90910.07884989
 77674.67507925 53941.60539478]
Mean: 71891.71307941683
Std.dev: 13249.525989444988


In [66]:
scores = cross_val_score(RF_model, X_prepared, y, scoring="neg_mean_squared_error", cv=10)
LR_rmse_scores = np.sqrt(-scores)
display_scores(LR_rmse_scores)

Scores: [98706.36893493 47175.91620578 65128.32853844 56474.36411888
 61009.50970916 60001.52602697 47243.80199881 80213.2596984
 73910.93743646 49491.87332602]
Mean: 63935.58859938554
Std.dev: 15565.127307016357


In [71]:
import pickle

filename = 'RF_model.uyni_pragnoz' # faylga istalgan nom beramiz
with open(filename, 'wb') as file:
    pickle.dump(RF_model, file)