In [13]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load your dataset
data = pd.read_csv('PreprocessedHousing.csv')  # replace with the actual dataset file path

# Strip any extra spaces from column names
data.columns = data.columns.str.strip()

# Features and target
X = data.drop('price', axis=1)
y = data['price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing (handle categorical and numerical features)
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']),  # numeric features
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['basement_yes', 
                                                         'airconditioning_yes', 'prefarea_yes', 'furnishingstatus_semi-furnished', 'furnishingstatus_unfurnished'])  # categorical features
    ])


# Create a pipeline that preprocesses data and applies Linear Regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
model.fit(X_train, y_train)

# Predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model using R-squared
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

# Example of predicting new data (ensure correct format)
new_data = pd.DataFrame([[7420, 4.0, 3.0, 2.0, 2.0, 1, 0, 0, 1, 0]], 
                        columns=['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 
                                 'basement_yes', 'airconditioning_yes', 'prefarea_yes', 
                                 'furnishingstatus_semi-furnished', 'furnishingstatus_unfurnished'])

# Predict the price for the new data
predicted_price = model.predict(new_data)
print(f'Predicted Price: {predicted_price[0]}')



R-squared: 0.6404607338473809
Predicted Price: 7772856.120207834


In [14]:
# Importing the pickle library to save and load models
import pickle
# Defining the filename for saving the model
filename="Finilised_Model.sav"

In [15]:
# Saving the trained model to a file
pickle.dump(model,open(filename,'wb'))

In [16]:
import pickle
import pandas as pd

# Load the saved model from the file
loaded_model = pickle.load(open("Finilised_Model.sav", 'rb'))

# Ensure the data passed to the model has the same number of features as the training data (13 features)
result = loaded_model.predict(new_data)

print(f'Predicted Price: {result[0]}')


Predicted Price: 7772856.120207834
