In [11]:

# Function to clean and convert
def clean_and_convert(price_str):
    # Remove any non-numeric characters (including Arabic commas)
    cleaned_price = ''.join(c for c in price_str if c.isdigit() or c == '.')

    # Convert to float
    try:
        return float(cleaned_price)
    except ValueError:
        # Handle cases where the conversion fails
        return None  # or another appropriate value


In [12]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer  # Import the imputer

# Load your dataset
land = pd.read_csv('land.csv')

land.head()
# Apply the cleaning function to 'the price' column
land['the price'] = land['the price'].apply(clean_and_convert)
land['Space'] = land['Space'].apply(clean_and_convert)  # Apply the appropriate cleaning function for 'space'

features = land.drop('the price', axis=1)
features['Space'] = land['Space']
target = land['the price']

# Convert categorical columns to numerical using Label Encoding
label_encoder = LabelEncoder()
for column in features.select_dtypes(include=['object']).columns:
    features[column] = label_encoder.fit_transform(features[column])

# Impute missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
features_imputed = pd.DataFrame(imputer.fit_transform(features), columns=features.columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_imputed, target, test_size=0.1, random_state=42)

# Create a Random Forest Regressor
regressor = RandomForestRegressor(random_state=42)

# Train the regressor on the training data
regressor.fit(X_train, y_train)

# Make predictions on the test set
predictions = regressor.predict(X_test)

# Evaluate model performance for regression
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')




Mean Squared Error: 4392506126844.7803
R-squared: 0.5863619906552313
