<a href="https://colab.research.google.com/github/Sanjar-prog/Telegram-JavaScript-Bot/blob/main/Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import sklearn # scikit-learn kutubxonasi

In [2]:
from typing import DefaultDict
# Onlayn dataset joylashgan manzilini ko'rsatamiz
URL = "https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df = pd.read_csv(URL)

In [5]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

x_train = train_set.drop("median_house_value", axis=1)
y = train_set["median_house_value"].copy()
X_num = x_train.drop("ocean_proximity", axis=1)

In [6]:
from sklearn.base import BaseEstimator, TransformerMixin
# bizga kerak ustunlar indekslari
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True): # *args, **kargs
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self # bizni funksiyamiz faqat transformer. estimator emas
    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]
        if self.add_bedrooms_per_room: # add_bedrooms_per_room ustuni ixtiyoriy bo'ladi
            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[X, rooms_per_household, population_per_household,
                         bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_household, population_per_household]

In [7]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipeline = Pipeline([
          ('imputer', SimpleImputer(strategy="median")),
          ('attribs_adder', CombinedAttributesAdder()),
          ('std_scaler', StandardScaler()),
])

In [8]:
from sklearn.compose import ColumnTransformer
num_attribs = list(X_num)
cat_attribs = ["ocean_proximity"]

full_pipeline = ColumnTransformer([
          ("num", num_pipeline, num_attribs),
          ("cat", OneHotEncoder(), cat_attribs),
])

In [9]:
X_prepared = full_pipeline.fit_transform(x_train)

In [10]:
X_prepared

array([[ 1.27258656, -1.3728112 ,  0.34849025, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.70916212, -0.87669601,  1.61811813, ...,  0.        ,
         0.        ,  1.        ],
       [-0.44760309, -0.46014647, -1.95271028, ...,  0.        ,
         0.        ,  1.        ],
       ...,
       [ 0.59946887, -0.75500738,  0.58654547, ...,  0.        ,
         0.        ,  0.        ],
       [-1.18553953,  0.90651045, -1.07984112, ...,  0.        ,
         0.        ,  0.        ],
       [-1.41489815,  0.99543676,  1.85617335, ...,  0.        ,
         1.        ,  0.        ]])

In [11]:
### Linear Regression

In [12]:
from sklearn.linear_model import LinearRegression

LR_model = LinearRegression()

In [14]:
LR_model.fit(X_prepared, y)

In [15]:
test_data = x_train.sample(10)
test_data

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
1385,-122.11,37.99,10.0,2864.0,514.0,1300.0,507.0,4.3875,NEAR BAY
10782,-117.91,33.64,29.0,1652.0,310.0,832.0,326.0,4.8098,<1H OCEAN
8835,-118.37,34.09,24.0,630.0,172.0,257.0,147.0,5.5224,<1H OCEAN
4098,-118.36,34.14,30.0,1376.0,317.0,629.0,320.0,3.6823,<1H OCEAN
20329,-119.02,34.26,40.0,1498.0,292.0,707.0,249.0,3.7974,<1H OCEAN
3870,-118.46,34.16,26.0,2548.0,647.0,1098.0,540.0,4.3839,<1H OCEAN
14094,-117.1,32.75,17.0,871.0,379.0,955.0,351.0,1.4375,NEAR OCEAN
18012,-121.98,37.27,29.0,2658.0,484.0,1318.0,498.0,5.3561,<1H OCEAN
16530,-121.2,37.83,18.0,3415.0,580.0,1912.0,562.0,4.4423,INLAND
3330,-122.48,38.9,10.0,304.0,63.0,161.0,61.0,2.1964,INLAND


In [16]:
x_train

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
14196,-117.03,32.71,33.0,3126.0,627.0,2300.0,623.0,3.2596,NEAR OCEAN
8267,-118.16,33.77,49.0,3382.0,787.0,1314.0,756.0,3.8125,NEAR OCEAN
17445,-120.48,34.66,4.0,1897.0,331.0,915.0,336.0,4.1563,NEAR OCEAN
14265,-117.11,32.69,36.0,1421.0,367.0,1418.0,355.0,1.9425,NEAR OCEAN
2271,-119.80,36.78,43.0,2382.0,431.0,874.0,380.0,3.5542,INLAND
...,...,...,...,...,...,...,...,...,...
11284,-117.96,33.78,35.0,1330.0,201.0,658.0,217.0,6.3700,<1H OCEAN
11964,-117.43,34.02,33.0,3084.0,570.0,1753.0,449.0,3.0500,INLAND
5390,-118.38,34.03,36.0,2101.0,569.0,1756.0,527.0,2.9344,<1H OCEAN
860,-121.96,37.58,15.0,3575.0,597.0,1777.0,559.0,5.7192,<1H OCEAN


In [17]:
test_label = y.loc[test_data.index]
test_label

Unnamed: 0,median_house_value
1385,287700.0
10782,325400.0
8835,400000.0
4098,295200.0
20329,228700.0
3870,299100.0
14094,96400.0
18012,298900.0
16530,161400.0
3330,112500.0


In [19]:
test_data_prepared = full_pipeline.transform(test_data)
test_data_prepared

array([[-1.26033038,  1.09840406, -1.47659983,  0.10208644, -0.05846582,
        -0.11121422,  0.01840914,  0.26611495,  0.08950695, -0.04602181,
        -0.57573043,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ],
       [ 0.83381355, -0.93754033,  0.03108328, -0.45526234, -0.54534582,
        -0.52281577, -0.45671081,  0.48788365, -0.1540443 , -0.04705437,
        -0.43464498,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.60445493, -0.72692539, -0.36567544, -0.92523796, -0.87470583,
        -1.02852281, -0.92658082,  0.86210192, -0.48151447, -0.11648092,
         1.03731898,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.60944098, -0.70352373,  0.11043502, -0.58218335, -0.52863916,
        -0.70135234, -0.47246064, -0.10421725, -0.47553043, -0.09771098,
         0.30210095,  1.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.28036122, -0.64735975,  0

In [20]:
test_data_prepared = full_pipeline.transform(test_data)
predicted_labels = LR_model.predict(test_data_prepared)

In [21]:
predicted_labels

array([219035.33028544, 255313.82352557, 302393.42756091, 225936.77165082,
       241903.71683916, 272459.23528869, 165787.6699681 , 298483.25847561,
       161314.78089043,  77563.4929291 ])

In [22]:
pd.DataFrame({'Prognoz':predicted_labels, 'Real baxosi':test_label})

Unnamed: 0,Prognoz,Real baxosi
1385,219035.330285,287700.0
10782,255313.823526,325400.0
8835,302393.427561,400000.0
4098,225936.771651,295200.0
20329,241903.716839,228700.0
3870,272459.235289,299100.0
14094,165787.669968,96400.0
18012,298483.258476,298900.0
16530,161314.78089,161400.0
3330,77563.492929,112500.0
