In [7]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Read train and test data
train_data = pd.read_csv('train.csv', delimiter=',')
test_data = pd.read_csv('test.csv', delimiter=',')

selected_features = ['LandContour', 'GarageYrBlt', 'FullBath', '1stFlrSF', 'BsmtFinSF1', 
                     'TotRmsAbvGrd', 'ExterQual', 'OpenPorchSF', 'Heating', 'Condition2', 
                     'OverallQual', 'GarageCars', 'KitchenQual', 'KitchenAbvGr','GrLivArea', '2ndFlrSF', 'CentralAir', 
                     'BsmtQual', 'TotalBsmtSF', 'Fireplaces']

# Concatenating training and test data
data = pd.concat([train_data[selected_features], test_data[selected_features]])


# Dictionary for converting categorical features into numeric features
qual_mapping = {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0}

# Converting categorical features to numeric features using LabelEncoder
le = LabelEncoder()
for feature in data.select_dtypes(include='object'):
    if feature not in ('ExterQual','KitchenQual', 'BsmtQual'):
        data[feature] = le.fit_transform(data[feature])
    else:
        data[feature] = le.fit_transform(data[feature].map(qual_mapping))

# Splitting the data back into training and test set
X = data[:train_data.shape[0]]
y = train_data['SalePrice']
final_testing_data = data[train_data.shape[0]:]

# Splitting the dataset into random train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.44, random_state=42) 


# Random forest model training
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)

# Prediction on a trained model
final_pred = rf.predict(final_testing_data)

# Creating DataFrame for submission.csv 
df = pd.DataFrame({'Id': range(1, 1 + len(final_pred)),'SalePrice': final_pred})
df.to_csv('submission.csv', index=False)


