In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline 

In [2]:
# Reading of dataset into dataframe
df = pd.read_csv('lagos_houses.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Location,Price,Bedroom,Bathrooms,Toilet,Location Area,State
0,0,Agege,800000,2,5,2,Off Ekoro,Lagos
1,1,Agege,800000,2,5,2,Off Ekoro,Lagos
2,2,Agege,800000,2,5,2,Off Ekoro,Lagos
3,3,Agege,800000,2,5,2,Off Ekoro,Lagos
4,4,Agege,800000,2,5,2,Off Ekoro,Lagos


In [3]:
# Removal of unwanted columns
df = df.drop(columns=['Unnamed: 0', 'State'])
df.head()

Unnamed: 0,Location,Price,Bedroom,Bathrooms,Toilet,Location Area
0,Agege,800000,2,5,2,Off Ekoro
1,Agege,800000,2,5,2,Off Ekoro
2,Agege,800000,2,5,2,Off Ekoro
3,Agege,800000,2,5,2,Off Ekoro
4,Agege,800000,2,5,2,Off Ekoro


In [36]:
# Splitting data into X and y
X = df[['Location', 'Bedroom', 'Bathrooms', 'Toilet', 'Location Area']]
y = df['Price']

In [37]:
categorical_features = ['Location', 'Location Area']
# Transformer for categorical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ],
    remainder='passthrough'  # keep numeric columns as they are
)

In [38]:
# Create and Train Model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [39]:
# Splitting dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
model.fit(X_train, y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [42]:
# Evaluate model
from sklearn.metrics import mean_squared_error, r2_score
y_pred = model.predict(X_test)

In [43]:
# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

Mean Squared Error: 794213090273.67
R² Score: 1.00


In [44]:
# Putting criteria for prediction price of an apartment into dataframe
new_data = pd.DataFrame({
    'Location': ['Lekki'],
    'Bedroom': ['3'],
    'Bathrooms': ['4'],
    'Toilet': ['4'],
    'Location Area': ['Osapa']
    })

In [48]:
# Prediction of price with the above criteria
predicted_price = model.predict(new_data)
print("Predicted Price:", round(predicted_price[0], 2))

Predicted Price: 107588099.51
