In [9]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [10]:
housing = fetch_california_housing()
X, y = housing.data, housing.target

In [11]:
# შევქმნათ პანდას დატაფრეიმი X და y-დან
data = pd.DataFrame(X, columns=housing.feature_names)
data['target'] = y

print(data.isnull().sum())

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
target        0
dtype: int64


In [12]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [13]:
lr = LinearRegression()
lr.fit(X_train, y_train)

train_score = lr.score(X_train, y_train)
val_score = lr.score(X_val, y_val)

print(f"Linear Regression Train Score: {train_score:.3f}")
print(f"Linear Regression Validation Score: {val_score:.3f}")

Linear Regression Train Score: 0.613
Linear Regression Validation Score: 0.576


In [14]:
alphas = [0.1, 1, 10, 100, 1000]

for alpha in alphas:
    
    # Lasso Regression
    lasso = Lasso(alpha=alpha)
    lasso.fit(X_train, y_train)
    lasso_val_score = lasso.score(X_val, y_val)
    print(f"Lasso (alpha={alpha}) Validation Score: {lasso_val_score:.3f}")
    
    # Ridge Regression 
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    ridge_val_score = ridge.score(X_val, y_val)
    print(f"Ridge (alpha={alpha}) Validation Score: {ridge_val_score:.3f}")
    
    # ElasticNet
    elastic = ElasticNet(alpha=alpha)
    elastic.fit(X_train, y_train) 
    elastic_val_score = elastic.score(X_val, y_val)
    print(f"ElasticNet (alpha={alpha}) Validation Score: {elastic_val_score:.3f}")
    print("-----------------------------")

Lasso (alpha=0.1) Validation Score: 0.481
Ridge (alpha=0.1) Validation Score: 0.576
ElasticNet (alpha=0.1) Validation Score: 0.515
-----------------------------
Lasso (alpha=1) Validation Score: -0.000
Ridge (alpha=1) Validation Score: 0.576
ElasticNet (alpha=1) Validation Score: 0.204
-----------------------------
Lasso (alpha=10) Validation Score: -0.000
Ridge (alpha=10) Validation Score: 0.576
ElasticNet (alpha=10) Validation Score: -0.000
-----------------------------
Lasso (alpha=100) Validation Score: -0.000
Ridge (alpha=100) Validation Score: 0.578
ElasticNet (alpha=100) Validation Score: -0.000
-----------------------------
Lasso (alpha=1000) Validation Score: -0.000
Ridge (alpha=1000) Validation Score: 0.568
ElasticNet (alpha=1000) Validation Score: -0.000
-----------------------------


In [17]:
lasso_models = [Lasso(alpha=alpha) for alpha in alphas]
ridge_models = [Ridge(alpha=alpha) for alpha in alphas]
elastic_models = [ElasticNet(alpha=alpha) for alpha in alphas]

for lasso, ridge, elastic in zip(lasso_models, ridge_models, elastic_models):
    lasso.fit(X_train, y_train)
    ridge.fit(X_train, y_train)
    elastic.fit(X_train, y_train)
    
    lasso_val_score = lasso.score(X_val, y_val)
    ridge_val_score = ridge.score(X_val, y_val)
    elastic_val_score = elastic.score(X_val, y_val)
    
    print(f"Lasso (alpha={lasso.alpha}) Validation Score: {lasso_val_score:.3f}")
    print(f"Ridge (alpha={ridge.alpha}) Validation Score: {ridge_val_score:.3f}") 
    print(f"ElasticNet (alpha={elastic.alpha}) Validation Score: {elastic_val_score:.3f}")
    print("-----------------------------")
    
best_lasso = max(lasso_models, key=lambda x: x.score(X_val, y_val))
best_ridge = max(ridge_models, key=lambda x: x.score(X_val, y_val))
best_elastic = max(elastic_models, key=lambda x: x.score(X_val, y_val))

print(f"Best Lasso Score: {best_lasso.score(X_val, y_val):.3f}")
print(f"Best Ridge Score: {best_ridge.score(X_val, y_val):.3f}")
print(f"Best ElasticNet Score: {best_elastic.score(X_val, y_val):.3f}")

Lasso (alpha=0.1) Validation Score: 0.481
Ridge (alpha=0.1) Validation Score: 0.576
ElasticNet (alpha=0.1) Validation Score: 0.515
-----------------------------
Lasso (alpha=1) Validation Score: -0.000
Ridge (alpha=1) Validation Score: 0.576
ElasticNet (alpha=1) Validation Score: 0.204
-----------------------------
Lasso (alpha=10) Validation Score: -0.000
Ridge (alpha=10) Validation Score: 0.576
ElasticNet (alpha=10) Validation Score: -0.000
-----------------------------
Lasso (alpha=100) Validation Score: -0.000
Ridge (alpha=100) Validation Score: 0.578
ElasticNet (alpha=100) Validation Score: -0.000
-----------------------------
Lasso (alpha=1000) Validation Score: -0.000
Ridge (alpha=1000) Validation Score: 0.568
ElasticNet (alpha=1000) Validation Score: -0.000
-----------------------------
Best Lasso Score: 0.481
Best Ridge Score: 0.578
Best ElasticNet Score: 0.515


In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [19]:
# ჩამოტვირთეთ მონაცემები Kaggle-დან და შეინახეთ train.csv ფაილში
data = pd.read_csv("train.csv")

In [39]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [27]:
# გამოვყოფთ დამოკიდებულ და დამოუკიდებელ ცვლადებს
X = data.drop('SalePrice', axis=1)
y = data['SalePrice']

# დავმუშავებთ დაკარგულ მნიშვნელობებს
numeric_transformer = SimpleImputer(strategy='mean')
categorical_transformer = SimpleImputer(strategy='most_frequent')

# დავმუშავებთ კატეგორიულ ცვლადებს one-hot encoding-ით
categorical_cols = X.select_dtypes(include=['object']).columns
numeric_cols = X.select_dtypes(exclude=['object']).columns

preprocessor = ColumnTransformer(transformers=[
    ('numeric', numeric_transformer, numeric_cols),
    ('categorical', OneHotEncoder(handle_unknown='ignore'), categorical_cols)])

# შევქმნათ pipeline მონაცემების დასამუშავებლად და მოდელის დასატრენინგებლად
regression_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler())])

# გავყოფთ მონაცემებს სატრენინგო და ვალიდაციურ ნაწილებად
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# გამოვყოფთ დამოკიდებულ და დამოუკიდებელ ცვლადებს
X = data.drop('SalePrice', axis=1)
y = data['SalePrice']

# დავმუშავებთ დაკარგულ მნიშვნელობებს
numeric_transformer = SimpleImputer(strategy='mean')
categorical_transformer = SimpleImputer(strategy='most_frequent')

# დავმუშავებთ კატეგორიულ ცვლადებს one-hot encoding-ით
categorical_cols = X.select_dtypes(include=['object']).columns
numeric_cols = X.select_dtypes(exclude=['object']).columns

preprocessor = ColumnTransformer(transformers=[
    ('numeric', numeric_transformer, numeric_cols),
    ('categorical', OneHotEncoder(handle_unknown='ignore'), categorical_cols)])

# შევქმნათ pipeline მონაცემების დასამუშავებლად და მოდელის დასატრენინგებლად
regression_pipeline = Pdipeline(steps=[
    ('preprocessor', preprocessor)])

# გავყოფთ მონაცემებს სატრენინგო და ვალიდაციურ ნაწილებად
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression
lr = LinearRegression()
lr_pipeline = regression_pipeline.fit(X_train, y_train)
lr_pipeline.steps.append(('model', lr))




AttributeError: 'LinearRegression' object has no attribute 'transform'

In [38]:
# y_pred = lr_pipeline.predict(X_val)
# lr_val_score = r2_score(y_val, y_pred)
# print(f"Linear Regression Validation Score: {lr_val_score:.3f}")

NotFittedError: This LinearRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.