In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

In [2]:
import pandas as pd

In [3]:
data = {
    "city": ["lapaz", "east_legon", "buduburam", "dansoman", "teshie", "kasoa", "awoshie", "cantonments", "osu", "gomoa"],
    "land_size": [1, 2, 2, 1.5, 3, 4, 4, 2, 2, 4],
    "house_type": ["2 bedrooms", "pent house", "condo", "2 storey", "3 bedrooms detached", "2 bedrooms semi-detached", "5 bedrooms", "4 storey", "2 bedrooms", "2 bedrooms detached"],
    "property_price": [40000, 60000, 32000, 54000, 13000, 27000, 28000, 65500, 39800, 15500],
    "inflation_rate": [28]*10,
    "exchange_rate": [15.6]*10,
    "proximity_to_amenities": ["yes", "yes", "no", "no", "no", "yes", "yes", "yes", "no", "yes"],
    "age_of_property": [1, 2, 3, 3, 2, 1, 1, 5, 1, 3],
    "property_condition": ["excellent", "good", "fair", "fair", "good", "excellent", "excellent", "poor", "excellent", "fair"]
}

In [4]:
df = pd.DataFrame(data)

In [5]:
df

Unnamed: 0,city,land_size,house_type,property_price,inflation_rate,exchange_rate,proximity_to_amenities,age_of_property,property_condition
0,lapaz,1.0,2 bedrooms,40000,28,15.6,yes,1,excellent
1,east_legon,2.0,pent house,60000,28,15.6,yes,2,good
2,buduburam,2.0,condo,32000,28,15.6,no,3,fair
3,dansoman,1.5,2 storey,54000,28,15.6,no,3,fair
4,teshie,3.0,3 bedrooms detached,13000,28,15.6,no,2,good
5,kasoa,4.0,2 bedrooms semi-detached,27000,28,15.6,yes,1,excellent
6,awoshie,4.0,5 bedrooms,28000,28,15.6,yes,1,excellent
7,cantonments,2.0,4 storey,65500,28,15.6,yes,5,poor
8,osu,2.0,2 bedrooms,39800,28,15.6,no,1,excellent
9,gomoa,4.0,2 bedrooms detached,15500,28,15.6,yes,3,fair


In [6]:
# Features and target
X = df.drop("property_price", axis=1)
y = df["property_price"]

In [7]:
# Preprocessing pipelines
numeric_features = ["land_size", "age_of_property", "inflation_rate", "exchange_rate"]
categorical_features = ["city", "house_type", "proximity_to_amenities", "property_condition"]

In [8]:
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [9]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [10]:
# Append regressor to preprocessing pipeline.
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', RandomForestRegressor())])

In [11]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
model.fit(X_train, y_train)

In [13]:
y_pred = model.predict(X_test)
print(f'Mean Squared Error: {mean_squared_error(y_test, y_pred)}')

Mean Squared Error: 280694725.0


In [14]:
import pickle

In [15]:
with open('model_pkl', 'wb') as files:
    pickle.dump(model, files)