In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import Ridge
import pickle

In [2]:
file_path = 'HousingCleanData.csv'
data = pd.read_csv(file_path)

In [3]:
data = data.drop(columns=['Unnamed: 0'])
data.to_csv('HousingCleanData_cleaned.csv', index=False)

In [4]:
print(data.head())

              location  total_sqft  bath  price  bhk
0  1st Block Jayanagar      2850.0   4.0  428.0    4
1  1st Block Jayanagar      1630.0   3.0  194.0    3
2  1st Block Jayanagar      1875.0   2.0  235.0    3
3  1st Block Jayanagar      1200.0   2.0  130.0    3
4  1st Block Jayanagar      1235.0   2.0  148.0    2


In [5]:
X = data[['location', 'total_sqft', 'bath', 'bhk']]
y = data['price'] 

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
numerical_features = ['total_sqft', 'bath', 'bhk']
numerical_transformer = StandardScaler()

In [8]:
categorical_features = ['location']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [10]:
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Ridge())
])

In [11]:
model_pipeline.fit(X_train, y_train)

In [12]:
model_filename = 'RidgeModel02.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(model_pipeline, file)

print(f"Model saved to {model_filename}")

Model saved to RidgeModel02.pkl
